From c8d687b05c803e3b358014e7f729a5700a003552 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 7 Nov 2022 16:28:15 -0800 Subject: [PATCH 001/492] dev-amdgpu: Fix SDMA ring buffer wrap around The current SDMA wrap around handling only considers the ring buffer location as seen by the GPU. Eventually when the end of the SDMA ring buffer is reached, the driver waits until the rptr written back to the host catches up to what the driver sees before wrapping around back to the beginning of the buffer. This writeback currently does not happen at all, causing hangs for applications with a lot of SDMA commands. This changeset first fixes the sizes of the queues, especially RLC queues, so that the wrap around occurs in the correct place. Second, we now store the rptr writeback address and the absoluate (unwrapped) rptr value in each SDMA queue. The absolulte rptr is what the driver sends to the device and what it expects to be written back. This was tested with an application which basically does a few hundred thousand hipMemcpy() calls in a loop. It should also fix the issue with pannotia BC in fullsystem mode. Change-Id: I53ebdcc6b02fb4eb4da435c9a509544066a97069 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65351 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Jason Lowe-Power Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- src/dev/amdgpu/pm4_packet_processor.cc | 14 ++++++++---- src/dev/amdgpu/sdma_engine.cc | 30 ++++++++++++++++++++------ src/dev/amdgpu/sdma_engine.hh | 18 ++++++++++++---- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index 404beab16c..4f98f18d16 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -441,12 +441,17 @@ void PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid) { + uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1); + Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi; + rptr_wb_addr <<= 32; + rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo; + DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: " - "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x\n", mqd->rb_base, - mqd->sdmax_rlcx_rb_rptr, mqd->sdmax_rlcx_rb_rptr_hi, + "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n", + mqd->rb_base, mqd->sdmax_rlcx_rb_rptr, mqd->sdmax_rlcx_rb_rptr_hi, mqd->sdmax_rlcx_rb_wptr, mqd->sdmax_rlcx_rb_wptr_hi, mqd->sdmax_rlcx_ib_base_lo, mqd->sdmax_rlcx_ib_base_hi, - mqd->sdmax_rlcx_ib_size, mqd->sdmax_rlcx_rb_cntl); + rlc_size, mqd->sdmax_rlcx_rb_cntl, rptr_wb_addr); // Engine 2 points to SDMA0 while engine 3 points to SDMA1 assert(pkt->engineSel == 2 || pkt->engineSel == 3); @@ -454,7 +459,8 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, // Register RLC queue with SDMA sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, - mqd->rb_base << 8); + mqd->rb_base << 8, rlc_size, + rptr_wb_addr); // Register doorbell with GPU device gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng); diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index e9a4c176d8..59c5027c85 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -161,7 +161,8 @@ SDMAEngine::translate(Addr vaddr, Addr size) } void -SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base) +SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, + Addr rptr_wb_addr) { // Get first free RLC if (!rlc0.valid()) { @@ -171,19 +172,19 @@ SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base) rlc0.base(rb_base); rlc0.rptr(0); rlc0.wptr(0); + rlc0.rptrWbAddr(rptr_wb_addr); rlc0.processing(false); - // TODO: size - I think pull from MQD 2^rb_cntrl[6:1]-1 - rlc0.size(1024*1024); + rlc0.size(size); } else if (!rlc1.valid()) { DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell); rlcInfo[1] = doorbell; rlc1.valid(true); rlc1.base(rb_base); - rlc1.rptr(1); - rlc1.wptr(1); + rlc1.rptr(0); + rlc1.wptr(0); + rlc1.rptrWbAddr(rptr_wb_addr); rlc1.processing(false); - // TODO: size - I think pull from MQD 2^rb_cntrl[6:1]-1 - rlc1.size(1024*1024); + rlc1.size(size); } else { panic("No free RLCs. Check they are properly unmapped."); } @@ -291,6 +292,17 @@ SDMAEngine::decodeNext(SDMAQueue *q) { decodeHeader(q, header); }); dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer); } else { + // The driver expects the rptr to be written back to host memory + // periodically. In simulation, we writeback rptr after each burst of + // packets from a doorbell, rather than using the cycle count which + // is not accurate in all simulation settings (e.g., KVM). + DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n", + q->globalRptr(), q->rptrWbAddr()); + if (q->rptrWbAddr()) { + auto cb = new DmaVirtCallback( + [ = ](const uint64_t &) { }, q->globalRptr()); + dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer); + } q->processing(false); if (q->parent()) { DPRINTF(SDMAEngine, "SDMA switching queues\n"); @@ -1158,6 +1170,7 @@ SDMAEngine::setGfxRptrLo(uint32_t data) { gfxRptr = insertBits(gfxRptr, 31, 0, 0); gfxRptr |= data; + gfx.rptrWbAddr(getGARTAddr(gfxRptr)); } void @@ -1165,6 +1178,7 @@ SDMAEngine::setGfxRptrHi(uint32_t data) { gfxRptr = insertBits(gfxRptr, 63, 32, 0); gfxRptr |= ((uint64_t)data) << 32; + gfx.rptrWbAddr(getGARTAddr(gfxRptr)); } void @@ -1236,6 +1250,7 @@ SDMAEngine::setPageRptrLo(uint32_t data) { pageRptr = insertBits(pageRptr, 31, 0, 0); pageRptr |= data; + page.rptrWbAddr(getGARTAddr(pageRptr)); } void @@ -1243,6 +1258,7 @@ SDMAEngine::setPageRptrHi(uint32_t data) { pageRptr = insertBits(pageRptr, 63, 32, 0); pageRptr |= ((uint64_t)data) << 32; + page.rptrWbAddr(getGARTAddr(pageRptr)); } void diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh index 6fe7a8e565..d0afaf7a4a 100644 --- a/src/dev/amdgpu/sdma_engine.hh +++ b/src/dev/amdgpu/sdma_engine.hh @@ -58,6 +58,8 @@ class SDMAEngine : public DmaVirtDevice Addr _rptr; Addr _wptr; Addr _size; + Addr _rptr_wb_addr = 0; + Addr _global_rptr = 0; bool _valid; bool _processing; SDMAQueue *_parent; @@ -72,6 +74,8 @@ class SDMAEngine : public DmaVirtDevice Addr wptr() { return _base + _wptr; } Addr getWptr() { return _wptr; } Addr size() { return _size; } + Addr rptrWbAddr() { return _rptr_wb_addr; } + Addr globalRptr() { return _global_rptr; } bool valid() { return _valid; } bool processing() { return _processing; } SDMAQueue* parent() { return _parent; } @@ -82,22 +86,27 @@ class SDMAEngine : public DmaVirtDevice void incRptr(uint32_t value) { - //assert((_rptr + value) <= (_size << 1)); _rptr = (_rptr + value) % _size; + _global_rptr += value; } - void rptr(Addr value) { _rptr = value; } + void + rptr(Addr value) + { + _rptr = value; + _global_rptr = value; + } void setWptr(Addr value) { - //assert(value <= (_size << 1)); _wptr = value % _size; } void wptr(Addr value) { _wptr = value; } void size(Addr value) { _size = value; } + void rptrWbAddr(Addr value) { _rptr_wb_addr = value; } void valid(bool v) { _valid = v; } void processing(bool value) { _processing = value; } void parent(SDMAQueue* q) { _parent = q; } @@ -268,7 +277,8 @@ class SDMAEngine : public DmaVirtDevice /** * Methods for RLC queues */ - void registerRLCQueue(Addr doorbell, Addr rb_base); + void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, + Addr rptr_wb_addr); void unregisterRLCQueue(Addr doorbell); void deallocateRLCQueues(); From 90046bae6f14b4bf94fc450efc617614b9214f92 Mon Sep 17 00:00:00 2001 From: handsomeliu Date: Tue, 8 Nov 2022 15:39:28 +0800 Subject: [PATCH 002/492] systemc: Add the stream id entry and its conversion in control extension stream id and substream id are properties of gem5 Request. This CL adds the information into gem5 ControlExtension to manipulate them in SystemC level, and adds the conversion between ControlExtension and Packet. Change-Id: Id13d181561ba496c2012f7237eb800f0a9786d05 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65371 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/systemc/tlm_bridge/sc_ext.cc | 54 +++++++++++++++++++++++++++++++- src/systemc/tlm_bridge/sc_ext.hh | 13 ++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/systemc/tlm_bridge/sc_ext.cc b/src/systemc/tlm_bridge/sc_ext.cc index 4d12fb3d9f..6e3cf113d3 100644 --- a/src/systemc/tlm_bridge/sc_ext.cc +++ b/src/systemc/tlm_bridge/sc_ext.cc @@ -33,6 +33,8 @@ #include "systemc/tlm_bridge/sc_ext.hh" +#include + #include "systemc/ext/utils/sc_report_handler.hh" #include "systemc/tlm_bridge/gem5_to_tlm.hh" #include "systemc/tlm_bridge/tlm_to_gem5.hh" @@ -76,6 +78,14 @@ struct ControlConversionRegister } pkt->qosValue(control_ex->getQos()); + + if (control_ex->hasStreamId()) { + pkt->req->setStreamId(control_ex->getStreamId().value()); + } + if (control_ex->hasSubstreamId()) { + pkt->req->setSubstreamId( + control_ex->getSubstreamId().value()); + } }); sc_gem5::addPacketToPayloadConversionStep( [] (PacketPtr pkt, tlm::tlm_generic_payload &trans) @@ -90,6 +100,12 @@ struct ControlConversionRegister control_ex->setSecure(pkt->req->isSecure()); control_ex->setInstruction(pkt->req->isInstFetch()); control_ex->setQos(pkt->qosValue()); + if (pkt->req->hasStreamId()) { + control_ex->setStreamId(pkt->req->streamId()); + } + if (pkt->req->hasSubstreamId()) { + control_ex->setSubstreamId(pkt->req->substreamId()); + } }); } }; @@ -263,4 +279,40 @@ ControlExtension::setQos(uint8_t q) qos = q; } -} // namespace Gem5SystemC +bool +ControlExtension::hasStreamId() const +{ + return stream_id.has_value(); +} + +std::optional +ControlExtension::getStreamId() const +{ + return stream_id; +} + +void +ControlExtension::setStreamId(std::optional s) +{ + stream_id = std::move(s); +} + +bool +ControlExtension::hasSubstreamId() const +{ + return substream_id.has_value(); +} + +std::optional +ControlExtension::getSubstreamId() const +{ + return substream_id; +} + +void +ControlExtension::setSubstreamId(std::optional s) +{ + substream_id = std::move(s); +} + +} // namespace Gem5SystemC diff --git a/src/systemc/tlm_bridge/sc_ext.hh b/src/systemc/tlm_bridge/sc_ext.hh index bb676761ce..f23f3fa54d 100644 --- a/src/systemc/tlm_bridge/sc_ext.hh +++ b/src/systemc/tlm_bridge/sc_ext.hh @@ -36,6 +36,7 @@ #include #include +#include #include "base/amo.hh" #include "mem/packet.hh" @@ -115,6 +116,14 @@ class ControlExtension : public tlm::tlm_extension uint8_t getQos() const; void setQos(uint8_t q); + /* Stream ID and Substream ID */ + bool hasStreamId() const; + std::optional getStreamId() const; + void setStreamId(std::optional s); + bool hasSubstreamId() const; + std::optional getSubstreamId() const; + void setSubstreamId(std::optional s); + private: /* Secure and privileged access */ bool privileged; @@ -123,6 +132,10 @@ class ControlExtension : public tlm::tlm_extension /* Quality of Service (AXI4) */ uint8_t qos; + + /* Stream ID and Substream ID */ + std::optional stream_id; + std::optional substream_id; }; } // namespace Gem5SystemC From 623e2d3dac3e75c67b4e1b8f6a7113f0ab376960 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 8 Nov 2022 14:24:32 -0800 Subject: [PATCH 003/492] dev-amdgpu: Handle ring buffer wrap for PM4 queue Change-Id: I27bc274327838add709423b072d437c4e727a714 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65431 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/dev/amdgpu/pm4_mmio.hh | 1 + src/dev/amdgpu/pm4_packet_processor.cc | 13 +++++++++++-- src/dev/amdgpu/pm4_packet_processor.hh | 1 + src/dev/amdgpu/pm4_queues.hh | 7 +++++-- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/dev/amdgpu/pm4_mmio.hh b/src/dev/amdgpu/pm4_mmio.hh index a3ce5f14e5..3801223175 100644 --- a/src/dev/amdgpu/pm4_mmio.hh +++ b/src/dev/amdgpu/pm4_mmio.hh @@ -60,6 +60,7 @@ namespace gem5 #define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI 0x1251 #define mmCP_HQD_PQ_WPTR_POLL_ADDR 0x1252 #define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI 0x1253 +#define mmCP_HQD_PQ_CONTROL 0x1256 #define mmCP_HQD_IB_CONTROL 0x125a #define mmCP_HQD_PQ_WPTR_LO 0x127b #define mmCP_HQD_PQ_WPTR_HI 0x127c diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index 4f98f18d16..f78f8333a6 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -147,8 +147,8 @@ PM4PacketProcessor::newQueue(QueueDesc *mqd, Addr offset, gpuDevice->setDoorbellType(offset, qt); DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: " - "%d, pipe %d queue: %d\n", id, q->base(), q->offset(), q->me(), - q->pipe(), q->queue()); + "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(), + q->me(), q->pipe(), q->queue(), q->size()); } void @@ -790,6 +790,9 @@ PM4PacketProcessor::writeMMIO(PacketPtr pkt, Addr mmio_offset) case mmCP_HQD_PQ_WPTR_POLL_ADDR_HI: setHqdPqWptrPollAddrHi(pkt->getLE()); break; + case mmCP_HQD_PQ_CONTROL: + setHqdPqControl(pkt->getLE()); + break; case mmCP_HQD_IB_CONTROL: setHqdIbCtrl(pkt->getLE()); break; @@ -911,6 +914,12 @@ PM4PacketProcessor::setHqdPqWptrPollAddrHi(uint32_t data) kiq.hqd_pq_wptr_poll_addr_hi = data; } +void +PM4PacketProcessor::setHqdPqControl(uint32_t data) +{ + kiq.hqd_pq_control = data; +} + void PM4PacketProcessor::setHqdIbCtrl(uint32_t data) { diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh index 48066713a5..4617a21a06 100644 --- a/src/dev/amdgpu/pm4_packet_processor.hh +++ b/src/dev/amdgpu/pm4_packet_processor.hh @@ -171,6 +171,7 @@ class PM4PacketProcessor : public DmaVirtDevice void setHqdPqRptrReportAddrHi(uint32_t data); void setHqdPqWptrPollAddr(uint32_t data); void setHqdPqWptrPollAddrHi(uint32_t data); + void setHqdPqControl(uint32_t data); void setHqdIbCtrl(uint32_t data); void setRbVmid(uint32_t data); void setRbCntl(uint32_t data); diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh index 19973b113e..8b6626d176 100644 --- a/src/dev/amdgpu/pm4_queues.hh +++ b/src/dev/amdgpu/pm4_queues.hh @@ -396,14 +396,14 @@ class PM4Queue rptr() { if (ib()) return q->ibBase + q->ibRptr; - else return q->base + q->rptr; + else return q->base + (q->rptr % size()); } Addr wptr() { if (ib()) return q->ibBase + _ibWptr; - else return q->base + _wptr; + else return q->base + (_wptr % size()); } Addr @@ -470,6 +470,9 @@ class PM4Queue uint32_t pipe() { return _pkt.pipe; } uint32_t queue() { return _pkt.queueSlot; } bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; } + + // Same computation as processMQD. See comment there for details. + uint64_t size() { return 4UL << ((q->hqd_pq_control & 0x3f) + 1); } }; } // namespace gem5 From 8693d725e202002893aafc4ac814bfa87c86ae76 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 8 Nov 2022 19:58:07 -0800 Subject: [PATCH 004/492] arch-vega: Fix SOPK instruction sign extends See: https://gem5-review.googlesource.com/c/public/gem5/+/37495 Same patch but for vega. This fixes issues with lulesh and probably rodinia - heartwall as well in fullsystem. Change-Id: I3af36bb9b60d32dc96cc3b439bb1167be1b0945d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65432 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 76bb8aad49..f5b08b7ce1 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -1553,7 +1553,7 @@ namespace VegaISA void Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ScalarOperandI32 sdst(gpuDynInst, instData.SDST); sdst = simm16; @@ -1579,7 +1579,7 @@ namespace VegaISA void Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ScalarOperandI32 sdst(gpuDynInst, instData.SDST); ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1607,7 +1607,7 @@ namespace VegaISA void Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1634,7 +1634,7 @@ namespace VegaISA void Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1661,7 +1661,7 @@ namespace VegaISA void Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1688,7 +1688,7 @@ namespace VegaISA void Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1715,7 +1715,7 @@ namespace VegaISA void Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1742,7 +1742,7 @@ namespace VegaISA void Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); ConstScalarOperandI32 src(gpuDynInst, instData.SDST); ScalarOperandU32 scc(gpuDynInst, REG_SCC); @@ -1939,7 +1939,7 @@ namespace VegaISA src.read(); - sdst = src.rawData() + (ScalarRegI32)simm16; + sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); scc = (bits(src.rawData(), 31) == bits(simm16, 15) && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; @@ -1969,7 +1969,7 @@ namespace VegaISA src.read(); - sdst = src.rawData() * (ScalarRegI32)simm16; + sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16); sdst.write(); } // execute From 78b978686c5195fa9b4574c1285649872ea34a3e Mon Sep 17 00:00:00 2001 From: Jasjeet Rangi Date: Mon, 7 Nov 2022 15:09:24 -0800 Subject: [PATCH 005/492] stdlib: Fix get_isa_from_str() exception behavior in isas.py When given an input string that does not match any valid ISA, the get_isa_from_str() function should call get_isas_str_set() to to print the valid ISA strings in the exception. The current behavior is to recursively call get_isa_from_str() with no input, which prevents the correct exception from being raised. This change causes the correct exception to be raised for invalid inputs. Change-Id: I92bfe862bbd99ce0b63bfc124e539fab3b175e0c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65311 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/python/gem5/isas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gem5/isas.py b/src/python/gem5/isas.py index c904c9d227..84f02b87e7 100644 --- a/src/python/gem5/isas.py +++ b/src/python/gem5/isas.py @@ -81,7 +81,7 @@ def get_isa_from_str(input: str) -> ISA: return isa valid_isas_str_list = str() - for isa_str in get_isa_from_str(): + for isa_str in get_isas_str_set(): valid_isas_str_list += f"{os.linesep}{isa_str}" raise Exception( From dff879cf21ee609cca3662073cd89cb9322146be Mon Sep 17 00:00:00 2001 From: vramadas95 Date: Thu, 10 Nov 2022 20:42:25 -0600 Subject: [PATCH 006/492] configs, gpu-compute: Add configurable L1 scalar latencies Previously the scalar cache path used the same latency parameter as the vector cache path for memory requests. This commit adds new parameters for the scalar cache path latencies. This commit also modifies the model to use the new latency parameter to set the memory request latency in the scalar cache. The new paramters are '--scalar-mem-req-latency' and '--scalar-mem-resp-latency' and are set to default values of 50 and 0 respectively Change-Id: I7483f780f2fc0cfbc320ed1fd0c2ee3e2dfc7af2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65511 Reviewed-by: Matt Sinclair Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Matt Sinclair --- configs/example/apu_se.py | 17 +++++++++++++++++ src/gpu-compute/GPU.py | 13 +++++++++++++ src/gpu-compute/compute_unit.cc | 6 +++++- src/gpu-compute/compute_unit.hh | 2 ++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index b33daa5b39..39def024fc 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -275,6 +275,21 @@ parser.add_argument( default=50, help="Latency for responses from ruby to the cu.", ) +parser.add_argument( + "--scalar-mem-req-latency", + type=int, + default=50, + help="Latency for scalar requests from the cu to ruby.", +) +parser.add_argument( + "--scalar-mem-resp-latency", + type=int, + # Set to 0 as the scalar cache response path does not model + # response latency yet and this parameter is currently not used + default=0, + help="Latency for scalar responses from ruby to the cu.", +) + parser.add_argument( "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs" ) @@ -463,6 +478,8 @@ for i in range(n_cu): vrf_lm_bus_latency=args.vrf_lm_bus_latency, mem_req_latency=args.mem_req_latency, mem_resp_latency=args.mem_resp_latency, + scalar_mem_req_latency=args.scalar_mem_req_latency, + scalar_mem_resp_latency=args.scalar_mem_resp_latency, localDataStore=LdsState( banks=args.numLdsBanks, bankConflictPenalty=args.ldsBankConflictPenalty, diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 517d1801c0..0fdc0b75a7 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -178,6 +178,19 @@ class ComputeUnit(ClockedObject): "TCP and cu as well as TCP data array " "access. Specified in GPU clock cycles", ) + scalar_mem_req_latency = Param.Int( + 50, + "Latency for scalar requests from the cu to ruby. " + "Represents the pipeline to reach the TCP " + "and specified in GPU clock cycles", + ) + scalar_mem_resp_latency = Param.Int( + 50, + "Latency for scalar responses from ruby to the " + "cu. Represents the pipeline between the " + "TCP and cu as well as TCP data array " + "access. Specified in GPU clock cycles", + ) system = Param.System(Parent.any, "system object") cu_id = Param.Int("CU id") vrf_to_coalescer_bus_width = Param.Int( diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 8498ea475e..62cfbf94cf 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -98,6 +98,10 @@ ComputeUnit::ComputeUnit(const Params &p) : ClockedObject(p), countPages(p.countPages), req_tick_latency(p.mem_req_latency * p.clk_domain->clockPeriod()), resp_tick_latency(p.mem_resp_latency * p.clk_domain->clockPeriod()), + scalar_req_tick_latency( + p.scalar_mem_req_latency * p.clk_domain->clockPeriod()), + scalar_resp_tick_latency( + p.scalar_mem_resp_latency * p.clk_domain->clockPeriod()), _requestorId(p.system->getRequestorId(this, "ComputeUnit")), lds(*p.localDataStore), gmTokenPort(name() + ".gmTokenPort", this), ldsPort(csprintf("%s-port", name()), this), @@ -1786,7 +1790,7 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt) = new ComputeUnit::ScalarDataPort::MemReqEvent (computeUnit->scalarDataPort, req_pkt); computeUnit->schedule(scalar_mem_req_event, curTick() + - computeUnit->req_tick_latency); + computeUnit->scalar_req_tick_latency); return true; } diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index a080e3dc1a..fcc4468ec1 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -354,6 +354,8 @@ class ComputeUnit : public ClockedObject Tick req_tick_latency; Tick resp_tick_latency; + Tick scalar_req_tick_latency; + Tick scalar_resp_tick_latency; /** * Number of WFs to schedule to each SIMD. This vector is populated From a49cba948049b7b8a3a30a586160c8198292ff51 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Thu, 10 Nov 2022 17:07:11 -0800 Subject: [PATCH 007/492] arch-x86: X86ISA default vector_string to HygonGenuine This commit, https://gem5-review.googlesource.com/c/public/gem5/+/64831, changed the default 'vendor_string' for the 'X86ISA' SimObject from 'M5 Simulator' and 'AuthenticAMD'. Unforunately due to an issue highlighted here: https://gem5.atlassian.net/browse/GEM5-1300 we cannot use the 'AuthenticAMD'. Therefore, this change updates the default vector_string to HygonGenuine. The HygonGenuine is simple but works. Change-Id: I21421da8ae73e76d9daaf2fdd0b3238d5b309172 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65492 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce --- src/arch/x86/X86ISA.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/arch/x86/X86ISA.py b/src/arch/x86/X86ISA.py index 2760b7672d..bb72c415e9 100644 --- a/src/arch/x86/X86ISA.py +++ b/src/arch/x86/X86ISA.py @@ -42,6 +42,15 @@ class X86ISA(BaseISA): cxx_class = "gem5::X86ISA::ISA" cxx_header = "arch/x86/isa.hh" + # Here we set the default vector string to "HygonGenuine". Previously this + # "M5 Simulator" but due to stricter checks in newer versions of GLIBC, + # the CPUID is checked for the required features. As "M5 Simulator" is not + # genuine CPUID, an error is returned. This change + # https://gem5-review.googlesource.com/c/public/gem5/+/64831 changed this + # to "GenuineAMD" but due to issues with booting the Linux Kernel using + # this vector string (highlighted here: + # https://gem5.atlassian.net/browse/GEM5-1300) we opted to use + # "HygonGenuine" instead. vendor_string = Param.String( - "AuthenticAMD", "Vendor string for CPUID instruction" + "HygonGenuine", "Vendor string for CPUID instruction" ) From 6651329cc57862ef02ad48dcded762fd8ee43604 Mon Sep 17 00:00:00 2001 From: Quentin Forcioli Date: Thu, 25 Aug 2022 14:52:09 +0200 Subject: [PATCH 008/492] base: query now works the same way normal command worked Query can now return true or false like normal command, to interrupt execution, it might be needed if a query need to wait for another event. Change-Id: Ic463287ecd88e6b63a53f2cb9a46c83d3419618c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63537 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce --- src/base/remote_gdb.cc | 29 +++++++++++++++++------------ src/base/remote_gdb.hh | 16 ++++++++-------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc index da3f113f38..c19dede748 100644 --- a/src/base/remote_gdb.cc +++ b/src/base/remote_gdb.cc @@ -1333,13 +1333,14 @@ std::map { "sThreadInfo", { &BaseRemoteGDB::querySThreadInfo } }, }; -void +bool BaseRemoteGDB::queryC(QuerySetCommand::Context &ctx) { send("QC%x", encodeThreadId(tc->contextId())); + return true; } -void +bool BaseRemoteGDB::querySupported(QuerySetCommand::Context &ctx) { std::ostringstream oss; @@ -1350,9 +1351,10 @@ BaseRemoteGDB::querySupported(QuerySetCommand::Context &ctx) for (const auto& feature : availableFeatures()) oss << ';' << feature; send(oss.str()); + return true; } -void +bool BaseRemoteGDB::queryXfer(QuerySetCommand::Context &ctx) { auto split = splitAt(ctx.args.at(0), ":"); @@ -1391,15 +1393,16 @@ BaseRemoteGDB::queryXfer(QuerySetCommand::Context &ctx) std::string encoded; encodeXferResponse(content, encoded, offset, length); send(encoded); + return true; } -void +bool BaseRemoteGDB::querySymbol(QuerySetCommand::Context &ctx) { //The target does not need to look up any (more) symbols. send("OK"); + return true; } - -void +bool BaseRemoteGDB::queryAttached(QuerySetCommand::Context &ctx) { std::string pid=""; @@ -1409,17 +1412,19 @@ BaseRemoteGDB::queryAttached(QuerySetCommand::Context &ctx) DPRINTF(GDBMisc, "QAttached : pid=%s\n",pid); //The remote server is attached to an existing process. send("1"); + return true; } -void +bool BaseRemoteGDB::queryFThreadInfo(QuerySetCommand::Context &ctx) { threadInfoIdx = 0; querySThreadInfo(ctx); + return true; } -void +bool BaseRemoteGDB::querySThreadInfo(QuerySetCommand::Context &ctx) { if (threadInfoIdx >= threads.size()) { @@ -1430,6 +1435,7 @@ BaseRemoteGDB::querySThreadInfo(QuerySetCommand::Context &ctx) std::advance(it, threadInfoIdx++); send("m%x", encodeThreadId(it->second->contextId())); } + return true; } bool @@ -1461,10 +1467,9 @@ BaseRemoteGDB::cmdQueryVar(GdbCommand::Context &ctx) remaining = std::move(arg_split.second); } } - - (this->*(query.func))(qctx); - - return true; + //returning true if the query want to pursue GDB command processing + //false means that the command processing stop until it's trigger again. + return (this->*(query.func))(qctx); } std::vector diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh index ad64bc721c..4da1dcc0c8 100644 --- a/src/base/remote_gdb.hh +++ b/src/base/remote_gdb.hh @@ -416,7 +416,7 @@ class BaseRemoteGDB Context(const std::string &_name) : name(_name) {} }; - using Func = void (BaseRemoteGDB::*)(Context &ctx); + using Func = bool (BaseRemoteGDB::*)(Context &ctx); const char * const argSep; const Func func; @@ -428,15 +428,15 @@ class BaseRemoteGDB static std::map queryMap; - void queryC(QuerySetCommand::Context &ctx); - void querySupported(QuerySetCommand::Context &ctx); - void queryXfer(QuerySetCommand::Context &ctx); - void querySymbol(QuerySetCommand::Context &ctx); - void queryAttached(QuerySetCommand::Context &ctx); + bool queryC(QuerySetCommand::Context &ctx); + bool querySupported(QuerySetCommand::Context &ctx); + bool queryXfer(QuerySetCommand::Context &ctx); + bool querySymbol(QuerySetCommand::Context &ctx); + bool queryAttached(QuerySetCommand::Context &ctx); size_t threadInfoIdx = 0; - void queryFThreadInfo(QuerySetCommand::Context &ctx); - void querySThreadInfo(QuerySetCommand::Context &ctx); + bool queryFThreadInfo(QuerySetCommand::Context &ctx); + bool querySThreadInfo(QuerySetCommand::Context &ctx); protected: ThreadContext *context() { return tc; } From 33a36d35dea1ac9ed9e2b45d85ed78f6c5aae600 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 17 Nov 2022 08:54:43 -0800 Subject: [PATCH 009/492] dev-amdgpu: Store SDMA queue type, use for ring ID Currently the SDMA queue type is guessed in the trap method by looking at which queue in the engine is processing packets. It is possible for both queues to be processing (e.g., one queue sent a DMA and is waiting then switch to another queue), triggering an assert. Instead store the queue type in the queue itself and use that type in trap to determine which ring ID to use for the interrupt packet. Change-Id: If91c458e60a03f2013c0dc42bab0b1673e3dbd84 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65691 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/dev/amdgpu/sdma_engine.cc | 10 +++++----- src/dev/amdgpu/sdma_engine.hh | 5 ++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 59c5027c85..02203c8178 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -55,11 +55,15 @@ SDMAEngine::SDMAEngine(const SDMAEngineParams &p) gfxIb.parent(&gfx); gfx.valid(true); gfxIb.valid(true); + gfx.queueType(SDMAGfx); + gfxIb.queueType(SDMAGfx); page.ib(&pageIb); pageIb.parent(&page); page.valid(true); pageIb.valid(true); + page.queueType(SDMAPage); + pageIb.queueType(SDMAPage); rlc0.ib(&rlc0Ib); rlc0Ib.parent(&rlc0); @@ -727,11 +731,7 @@ SDMAEngine::trap(SDMAQueue *q, sdmaTrap *pkt) DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext); - uint32_t ring_id = 0; - assert(page.processing() ^ gfx.processing()); - if (page.processing()) { - ring_id = 3; - } + uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0; gpuDevice->getIH()->prepareInterruptCookie(pkt->intrContext, ring_id, getIHClientId(), TRAP_ID); diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh index d0afaf7a4a..0bfee126c9 100644 --- a/src/dev/amdgpu/sdma_engine.hh +++ b/src/dev/amdgpu/sdma_engine.hh @@ -64,9 +64,10 @@ class SDMAEngine : public DmaVirtDevice bool _processing; SDMAQueue *_parent; SDMAQueue *_ib; + SDMAType _type; public: SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false), - _parent(nullptr), _ib(nullptr) {} + _parent(nullptr), _ib(nullptr), _type(SDMAGfx) {} Addr base() { return _base; } Addr rptr() { return _base + _rptr; } @@ -80,6 +81,7 @@ class SDMAEngine : public DmaVirtDevice bool processing() { return _processing; } SDMAQueue* parent() { return _parent; } SDMAQueue* ib() { return _ib; } + SDMAType queueType() { return _type; } void base(Addr value) { _base = value; } @@ -111,6 +113,7 @@ class SDMAEngine : public DmaVirtDevice void processing(bool value) { _processing = value; } void parent(SDMAQueue* q) { _parent = q; } void ib(SDMAQueue* ib) { _ib = ib; } + void queueType(SDMAType type) { _type = type; } }; /* SDMA Engine ID */ From ec75787aef56665e893d70293bf3a0f93c33bb6a Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Thu, 17 Nov 2022 15:48:34 -0800 Subject: [PATCH 010/492] arch-arm: Revert 'Setup TC/ISA at construction time..' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts: dd2f1fb2f8520849f10fc25fc5eab5beaa90a7d4 https://gem5-review.googlesource.com/c/public/gem5/+/65174 and 47bd56ee71ba1d684138365e7123aa779989ba1d https://gem5-review.googlesource.com/c/public/gem5/+/65291 The 47bd56ee change resulted in the `SuiteUID:tests/gem5/fs/linux/arm/test.py:realview-switcheroo-noncaching-timing-ALL-x86_64-opt` nightly test stalling. This behavior can be reproduced with: ``` ./build/ALL/gem5.opt tests/gem5/fs/linux/arm/run.py tests/gem5/configs/realview-switcheroo-noncaching-timing.py tests/gem5/resources/arm “$(pwd)” ``` The subsequent change, dd2f1fb2, must be reverted for this change to be reverted. Change-Id: I6fed74f33d013f321b93cf1a73eee404cb87ce18 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65732 Reviewed-by: Jason Lowe-Power Maintainer: Bobby Bruce Tested-by: kokoro --- src/arch/arm/isa.cc | 20 +++++++++++--------- src/dev/arm/gic_v3.cc | 2 +- src/dev/arm/gic_v3_cpu_interface.cc | 17 +++++++++++------ src/dev/arm/gic_v3_cpu_interface.hh | 9 +++++---- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index fd19f721b2..a30fd94596 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -523,6 +523,16 @@ ISA::setupThreadContext() return; selfDebug->init(tc); + + Gicv3 *gicv3 = dynamic_cast(system->getGIC()); + if (!gicv3) + return; + + if (!gicv3CpuInterface) + gicv3CpuInterface.reset(gicv3->getCPUInterface(tc->contextId())); + + gicv3CpuInterface->setISA(this); + gicv3CpuInterface->setThreadContext(tc); } void @@ -1998,15 +2008,7 @@ ISA::getGenericTimer() BaseISADevice & ISA::getGICv3CPUInterface() { - if (gicv3CpuInterface) - return *gicv3CpuInterface.get(); - - assert(system); - Gicv3 *gicv3 = dynamic_cast(system->getGIC()); - panic_if(!gicv3, "The system does not have a GICv3 irq controller\n"); - - gicv3CpuInterface.reset(gicv3->getCPUInterface(tc->contextId())); - + panic_if(!gicv3CpuInterface, "GICV3 cpu interface is not registered!"); return *gicv3CpuInterface.get(); } diff --git a/src/dev/arm/gic_v3.cc b/src/dev/arm/gic_v3.cc index e14d1f2bef..dde3818b07 100644 --- a/src/dev/arm/gic_v3.cc +++ b/src/dev/arm/gic_v3.cc @@ -147,7 +147,7 @@ Gicv3::init() for (int i = 0; i < threads; i++) { redistributors[i] = new Gicv3Redistributor(this, i); - cpuInterfaces[i] = new Gicv3CPUInterface(this, sys->threads[i]); + cpuInterfaces[i] = new Gicv3CPUInterface(this, i); } distRange = RangeSize(params().dist_addr, diff --git a/src/dev/arm/gic_v3_cpu_interface.cc b/src/dev/arm/gic_v3_cpu_interface.cc index a11dd9b8ed..0e1dbaa04b 100644 --- a/src/dev/arm/gic_v3_cpu_interface.cc +++ b/src/dev/arm/gic_v3_cpu_interface.cc @@ -55,19 +55,15 @@ using namespace ArmISA; const uint8_t Gicv3CPUInterface::GIC_MIN_BPR; const uint8_t Gicv3CPUInterface::GIC_MIN_BPR_NS; -Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, ThreadContext *_tc) +Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id) : BaseISADevice(), gic(gic), redistributor(nullptr), distributor(nullptr), - tc(_tc), - maintenanceInterrupt(gic->params().maint_int->get(tc)), - cpuId(tc->contextId()) + cpuId(cpu_id) { hppi.prio = 0xff; hppi.intid = Gicv3::INTID_SPURIOUS; - - setISA(static_cast(tc->getIsaPtr())); } void @@ -84,6 +80,15 @@ Gicv3CPUInterface::resetHppi(uint32_t intid) hppi.prio = 0xff; } +void +Gicv3CPUInterface::setThreadContext(ThreadContext *_tc) +{ + tc = _tc; + maintenanceInterrupt = gic->params().maint_int->get(tc); + fatal_if(maintenanceInterrupt->num() >= redistributor->irqPending.size(), + "Invalid maintenance interrupt number\n"); +} + bool Gicv3CPUInterface::getHCREL2FMO() const { diff --git a/src/dev/arm/gic_v3_cpu_interface.hh b/src/dev/arm/gic_v3_cpu_interface.hh index c39fab7647..e860373fb5 100644 --- a/src/dev/arm/gic_v3_cpu_interface.hh +++ b/src/dev/arm/gic_v3_cpu_interface.hh @@ -68,11 +68,11 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable Gicv3 * gic; Gicv3Redistributor * redistributor; Gicv3Distributor * distributor; - - ThreadContext *tc; - ArmInterruptPin *maintenanceInterrupt; uint32_t cpuId; + ArmInterruptPin *maintenanceInterrupt; + ThreadContext *tc; + BitUnion64(ICC_CTLR_EL1) Bitfield<63, 20> res0_3; Bitfield<19> ExtRange; @@ -359,7 +359,7 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable void setBankedMiscReg(ArmISA::MiscRegIndex misc_reg, RegVal val) const; public: - Gicv3CPUInterface(Gicv3 * gic, ThreadContext *tc); + Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id); void init(); @@ -369,6 +369,7 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable public: // BaseISADevice RegVal readMiscReg(int misc_reg) override; void setMiscReg(int misc_reg, RegVal val) override; + void setThreadContext(ThreadContext *tc) override; }; } // namespace gem5 From 5eb73551bda1ecdc632cf50f27eb45ff2dbf1bfa Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Mon, 14 Nov 2022 16:11:57 +0800 Subject: [PATCH 011/492] fastmodel: CortexR52 export standbywfi signal Change-Id: Ic9ed9a3e35f068e151725d36e7fff391013ff5d1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65534 Reviewed-by: Gabe Black Tested-by: kokoro Maintainer: Gabe Black --- .../fastmodel/CortexR52/FastModelCortexR52.py | 5 ++- .../arm/fastmodel/CortexR52/cortex_r52.cc | 4 ++ src/arch/arm/fastmodel/CortexR52/evs.cc | 10 +++++ src/arch/arm/fastmodel/CortexR52/evs.hh | 1 + src/arch/arm/fastmodel/CortexR52/x1/x1.lisa | 4 ++ src/arch/arm/fastmodel/CortexR52/x2/x2.lisa | 4 ++ src/arch/arm/fastmodel/CortexR52/x3/x3.lisa | 4 ++ src/arch/arm/fastmodel/CortexR52/x4/x4.lisa | 4 ++ .../arm/fastmodel/common/signal_receiver.hh | 37 +++++++++++++++++++ 9 files changed, 72 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py index 4970ae2ed4..1e267f028f 100644 --- a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py +++ b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py @@ -31,7 +31,7 @@ from m5.objects.ArmInterrupts import ArmInterrupts from m5.objects.ArmISA import ArmISA from m5.objects.FastModel import AmbaInitiatorSocket, AmbaTargetSocket from m5.objects.ResetPort import ResetResponsePort -from m5.objects.IntPin import IntSinkPin, VectorIntSinkPin +from m5.objects.IntPin import IntSourcePin, IntSinkPin, VectorIntSinkPin from m5.objects.Iris import IrisBaseCPU from m5.objects.SystemC import SystemC_ScModule @@ -56,6 +56,9 @@ class FastModelCortexR52(IrisBaseCPU): "processor logic, including debug logic." ) halt = IntSinkPin("Raising this signal will put the core into halt mode.") + standbywfi = IntSourcePin( + "This signal indicates if a core is in WFI state." + ) CFGEND = Param.Bool( False, diff --git a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc index be83082d16..9dfe7a5158 100644 --- a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc +++ b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc @@ -92,6 +92,10 @@ CortexR52::getPort(const std::string &if_name, PortID idx) // Since PPIs are indexed both by core and by number, modify the name // to hold the core number. return evs->gem5_getPort(csprintf("%s_%d", if_name, num), idx); + } else if (if_name == "standbywfi") { + // Since standbywfi is indexed by fanout, modify the name to hold the + // core number. + return evs->gem5_getPort(csprintf("%s_%d", if_name, num), idx); } else if (if_name == "amba" || if_name == "llpp" || if_name == "flash" || if_name == "core_reset" || if_name == "poweron_reset" || if_name == "halt") { diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc index 6887c6ced9..734323e026 100644 --- a/src/arch/arm/fastmodel/CortexR52/evs.cc +++ b/src/arch/arm/fastmodel/CortexR52/evs.cc @@ -79,6 +79,7 @@ ScxEvsCortexR52::CorePins::CorePins(Evs *_evs, int _cpu) : core_reset(name + ".core_reset", 0), poweron_reset(name + ".poweron_reset", 0), halt(name + ".halt", 0), + standbywfi(name + ".standbywfi"), cfgvectable((name + "cfgvectable").c_str()) { for (int i = 0; i < Evs::PpiCount; i++) { @@ -88,6 +89,7 @@ ScxEvsCortexR52::CorePins::CorePins(Evs *_evs, int _cpu) : core_reset.signal_out.bind(evs->core_reset[cpu]); poweron_reset.signal_out.bind(evs->poweron_reset[cpu]); halt.signal_out.bind(evs->halt[cpu]); + evs->standbywfi[cpu].bind(standbywfi.signal_in); cfgvectable.bind(evs->cfgvectable[cpu]); } @@ -161,6 +163,14 @@ ScxEvsCortexR52::gem5_getPort(const std::string &if_name, int idx) panic("Couldn't find CPU number in %s.", if_name); } return *this->corePins.at(cpu)->ppis.at(idx); + } else if (if_name.substr(0, 10) == "standbywfi") { + int cpu; + try { + cpu = std::stoi(if_name.substr(11)); + } catch (const std::invalid_argument &a) { + panic("Couldn't find CPU number in %s.", if_name); + } + return this->corePins.at(cpu)->standbywfi.getSignalOut(idx); } else { return Base::gem5_getPort(if_name, idx); } diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh index 535d678c34..02ef1ae257 100644 --- a/src/arch/arm/fastmodel/CortexR52/evs.hh +++ b/src/arch/arm/fastmodel/CortexR52/evs.hh @@ -110,6 +110,7 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs SignalSender core_reset; SignalSender poweron_reset; SignalSender halt; + SignalReceiverInt standbywfi; SignalInitiator cfgvectable; }; diff --git a/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa b/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa index 2a7299d77e..2738ba23f9 100644 --- a/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa +++ b/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa @@ -53,6 +53,9 @@ component CortexR52x1 self.dbg_reset => core.presetdbg; self.halt => core.cpuhalt; + // Status signals. + core.standbywfi => self.standbywfi; + // Clocks. clock1Hz.clk_out => clockDiv.clk_in; clock1Hz.clk_out => clockDivPeriph.clk_in; @@ -79,6 +82,7 @@ component CortexR52x1 slave port core_reset[1]; slave port poweron_reset[1]; slave port halt[1]; + master port standbywfi[1]; slave port top_reset; slave port dbg_reset; slave port cfgvectable[1]; diff --git a/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa b/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa index 9100a5bcc2..485ffee983 100644 --- a/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa +++ b/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa @@ -53,6 +53,9 @@ component CortexR52x2 self.dbg_reset => core.presetdbg; self.halt => core.cpuhalt; + // Status signals. + core.standbywfi => self.standbywfi; + // Clocks. clock1Hz.clk_out => clockDiv.clk_in; clock1Hz.clk_out => clockDivPeriph.clk_in; @@ -80,6 +83,7 @@ component CortexR52x2 slave port core_reset[2]; slave port poweron_reset[2]; slave port halt[2]; + master port standbywfi[2]; slave port top_reset; slave port dbg_reset; slave port cfgvectable[2]; diff --git a/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa b/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa index bb8d153f44..1e526d9958 100644 --- a/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa +++ b/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa @@ -53,6 +53,9 @@ component CortexR52x3 self.dbg_reset => core.presetdbg; self.halt => core.cpuhalt; + // Status signals. + core.standbywfi => self.standbywfi; + // Clocks. clock1Hz.clk_out => clockDiv.clk_in; clock1Hz.clk_out => clockDivPeriph.clk_in; @@ -81,6 +84,7 @@ component CortexR52x3 slave port core_reset[3]; slave port poweron_reset[3]; slave port halt[3]; + master port standbywfi[3]; slave port top_reset; slave port dbg_reset; slave port cfgvectable[3]; diff --git a/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa b/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa index 5b278ddb41..df23bf17b4 100644 --- a/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa +++ b/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa @@ -53,6 +53,9 @@ component CortexR52x4 self.dbg_reset => core.presetdbg; self.halt => core.cpuhalt; + // Status signals. + core.standbywfi => self.standbywfi; + // Clocks. clock1Hz.clk_out => clockDiv.clk_in; clock1Hz.clk_out => clockDivPeriph.clk_in; @@ -82,6 +85,7 @@ component CortexR52x4 slave port core_reset[4]; slave port poweron_reset[4]; slave port halt[4]; + master port standbywfi[4]; slave port top_reset; slave port dbg_reset; slave port cfgvectable[4]; diff --git a/src/arch/arm/fastmodel/common/signal_receiver.hh b/src/arch/arm/fastmodel/common/signal_receiver.hh index 0025e39173..990787743b 100644 --- a/src/arch/arm/fastmodel/common/signal_receiver.hh +++ b/src/arch/arm/fastmodel/common/signal_receiver.hh @@ -34,8 +34,12 @@ #pragma GCC diagnostic pop #include +#include #include "base/compiler.hh" +#include "base/cprintf.hh" +#include "base/types.hh" +#include "dev/intpin.hh" namespace gem5 { @@ -80,6 +84,39 @@ class SignalReceiver : public amba_pv::signal_slave_base } }; +class SignalReceiverInt : public SignalReceiver +{ + public: + using IntPin = IntSourcePin; + + explicit SignalReceiverInt(const std::string &name) + : SignalReceiver(name) + { + onChange([this](bool status) { + for (auto &signal : signalOut) { + if (signal && signal->isConnected()) + status ? signal->raise() : signal->lower(); + } + }); + } + + IntPin & + getSignalOut(int idx) + { + if (signalOut.size() <= idx) { + signalOut.resize(idx + 1); + } + if (!signalOut[idx]) { + signalOut[idx] = std::make_unique( + csprintf("%s.signalOut[%d]", get_name(), idx), idx, this); + } + return *signalOut[idx]; + } + + private: + std::vector> signalOut; +}; + } // namespace fastmodel } // namespace gem5 From ff16ca3dafcb2228843502d20a5288fdd64a9538 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 27 Sep 2022 02:52:42 -0700 Subject: [PATCH 012/492] mem: Add a class to describe a back door request. In cases where a back door is not being requested alongside a packet or request, there needs to be a structure which describes the address range to use, and what type of access the back door should support. It would be possible to make a Packet/Request to carry that information, but those types are actually pretty big, and have a lot of extra overhead which would be overkill for this purpose. Change-Id: I3638361ffa758ee959cb3bc57f7c35f2aa34a36c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65751 Reviewed-by: Jason Lowe-Power Maintainer: Gabe Black Tested-by: kokoro --- src/mem/backdoor.hh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/mem/backdoor.hh b/src/mem/backdoor.hh index 73e667017d..54fe4acbd1 100644 --- a/src/mem/backdoor.hh +++ b/src/mem/backdoor.hh @@ -126,6 +126,25 @@ class MemBackdoor typedef MemBackdoor *MemBackdoorPtr; +class MemBackdoorReq +{ + private: + AddrRange _range; + MemBackdoor::Flags _flags; + + public: + MemBackdoorReq(AddrRange r, MemBackdoor::Flags new_flags) : + _range(r), _flags(new_flags) + {} + + const AddrRange &range() const { return _range; } + + bool readable() const { return _flags & MemBackdoor::Readable; } + bool writeable() const { return _flags & MemBackdoor::Writeable; } + + MemBackdoor::Flags flags() const { return _flags; } +}; + } // namespace gem5 #endif //__MEM_BACKDOOR_HH__ From 842a3a935fe0773ae204d0b5eb2b3eac0995b6ed Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 27 Sep 2022 02:56:10 -0700 Subject: [PATCH 013/492] mem: Add an API for requesting a back door without a Packet/Request. Make this part of the Functional protocol, since it should always return immediately, can be shared by the atomic and timing protocols, and thematically fits with that protocol. The default implementation on the receiving end just ignores the request and leaves the back door pointer set to null, effectively making back doors default "off" which matches their behavior in the atomic protocol. This mechamism helps fix a bug in the TLM gem5 bridges which need to translate to/from the DMI and back door mechanisms, where there can be an explicit request for a back door which does not have a transaction associated with it. It is also necessary for bridging DMI requests in timing mode, since the DMI requests must be instant, and the timing protocol does not send/receive packets instantly. Change-Id: I905f13b9bc83c3fa7877b05ce932e17c308125e2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65752 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Gabe Black --- src/mem/port.cc | 16 ++++++++++++++++ src/mem/port.hh | 29 +++++++++++++++++++++++++++++ src/mem/protocol/functional.cc | 8 ++++++++ src/mem/protocol/functional.hh | 21 +++++++++++++++++++++ 4 files changed, 74 insertions(+) diff --git a/src/mem/port.cc b/src/mem/port.cc index 00f7ce6efa..18793d487b 100644 --- a/src/mem/port.cc +++ b/src/mem/port.cc @@ -102,6 +102,11 @@ class DefaultResponsePort : public ResponsePort // Functional protocol. void recvFunctional(PacketPtr) override { blowUp(); } + void + recvMemBackdoorReq(const MemBackdoorReq &, MemBackdoorPtr &) override + { + blowUp(); + } // General. AddrRangeList getAddrRanges() const override { return AddrRangeList(); } @@ -205,4 +210,15 @@ ResponsePort::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) return recvAtomic(pkt); } +void +ResponsePort::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + if (!defaultBackdoorWarned) { + DPRINTF(ResponsePort, + "Port %s doesn't support requesting a back door.", name()); + defaultBackdoorWarned = true; + } +} + } // namespace gem5 diff --git a/src/mem/port.hh b/src/mem/port.hh index 33ff117cf2..fb0f4b8812 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -161,6 +161,21 @@ class RequestPort: public Port, public AtomicRequestProtocol, */ void sendFunctional(PacketPtr pkt) const; + /** + * Send a request for a back door to a range of memory. + * + * @param req An object which describes what back door is being requested. + * @param backdoor Can be set to a back door pointer by the target to let + * caller have direct access to the requested range. The original + * caller should initialize this pointer to nullptr. If a receiver + * does not want to provide a back door, they should leave this + * value. If an intermediary wants to support a back door across it, + * it should pass this pointer through, or if not, return without + * passing the request further downstream. + */ + void sendMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor); + public: /* The timing protocol. */ @@ -438,6 +453,8 @@ class ResponsePort : public Port, public AtomicResponseProtocol, * Default implementations. */ Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) override; + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override; bool tryTiming(PacketPtr pkt) override @@ -491,6 +508,18 @@ RequestPort::sendFunctional(PacketPtr pkt) const } } +inline void +RequestPort::sendMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + try { + return FunctionalRequestProtocol::sendMemBackdoorReq( + _responsePort, req, backdoor); + } catch (UnboundPortException) { + reportUnbound(); + } +} + inline bool RequestPort::sendTimingReq(PacketPtr pkt) { diff --git a/src/mem/protocol/functional.cc b/src/mem/protocol/functional.cc index 0f54d92a76..29cec23bc3 100644 --- a/src/mem/protocol/functional.cc +++ b/src/mem/protocol/functional.cc @@ -53,6 +53,14 @@ FunctionalRequestProtocol::send( return peer->recvFunctional(pkt); } +void +FunctionalRequestProtocol::sendMemBackdoorReq( + FunctionalResponseProtocol *peer, + const MemBackdoorReq &req, MemBackdoorPtr &backdoor) +{ + return peer->recvMemBackdoorReq(req, backdoor); +} + /* The response protocol. */ void diff --git a/src/mem/protocol/functional.hh b/src/mem/protocol/functional.hh index 27db171b2d..4f330b4788 100644 --- a/src/mem/protocol/functional.hh +++ b/src/mem/protocol/functional.hh @@ -41,6 +41,7 @@ #ifndef __MEM_GEM5_PROTOCOL_FUNCTIONAL_HH__ #define __MEM_GEM5_PROTOCOL_FUNCTIONAL_HH__ +#include "mem/backdoor.hh" #include "mem/packet.hh" namespace gem5 @@ -66,6 +67,16 @@ class FunctionalRequestProtocol * Receive a functional snoop request packet from the peer. */ virtual void recvFunctionalSnoop(PacketPtr pkt) = 0; + + /** + * Send a request for a back door to a range of memory. + * + * @param req An object which describes what back door is being requested. + * @param backdoor Can be set to a back door pointer by the target to let + * caller have direct access to the requested range. + */ + void sendMemBackdoorReq(FunctionalResponseProtocol *peer, + const MemBackdoorReq &req, MemBackdoorPtr &backdoor); }; class FunctionalResponseProtocol @@ -86,6 +97,16 @@ class FunctionalResponseProtocol * Receive a functional request packet from the peer. */ virtual void recvFunctional(PacketPtr pkt) = 0; + + /** + * Receive a request for a back door to a range of memory. + * + * @param req An object which describes what back door is being requested. + * @param backdoor Can be set to a back door pointer by the target to let + * caller have direct access to the requested range. + */ + virtual void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) = 0; }; } // namespace gem5 From d7b3020324782bd0382ff800fb27b165cd3c65e3 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 1 Oct 2022 03:18:20 -0700 Subject: [PATCH 014/492] dev,mem,systemc: Implement and use the recvMemBackdoorReq func. Change-Id: If6e12d4fcef0c31131a9768099a72542a8f62ab1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65753 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Gabe Black Reviewed-by: Jui-min Lee --- src/mem/cfi_mem.cc | 15 ++++++++++++ src/mem/cfi_mem.hh | 4 +++ src/mem/coherent_xbar.cc | 8 ++++++ src/mem/coherent_xbar.hh | 12 +++++++++ src/mem/mem_ctrl.cc | 18 ++++++++++++++ src/mem/mem_ctrl.hh | 4 +++ src/mem/noncoherent_xbar.cc | 8 ++++++ src/mem/noncoherent_xbar.hh | 9 +++++++ src/mem/simple_mem.cc | 14 +++++++++++ src/mem/simple_mem.hh | 4 +++ src/mem/sys_bridge.hh | 7 ++++++ src/systemc/tlm_bridge/gem5_to_tlm.cc | 25 +++++++++++++++++++ src/systemc/tlm_bridge/gem5_to_tlm.hh | 9 +++++++ src/systemc/tlm_bridge/tlm_to_gem5.cc | 35 +++++++++++++++------------ 14 files changed, 156 insertions(+), 16 deletions(-) diff --git a/src/mem/cfi_mem.cc b/src/mem/cfi_mem.cc index 70dc43fca8..f8c1084700 100644 --- a/src/mem/cfi_mem.cc +++ b/src/mem/cfi_mem.cc @@ -275,6 +275,14 @@ CfiMemory::recvFunctional(PacketPtr pkt) pkt->popLabel(); } +void +CfiMemory::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &_backdoor) +{ + if (backdoor.ptr()) + _backdoor = &backdoor; +} + bool CfiMemory::recvTimingReq(PacketPtr pkt) { @@ -486,6 +494,13 @@ CfiMemory::MemoryPort::recvFunctional(PacketPtr pkt) mem.recvFunctional(pkt); } +void +CfiMemory::MemoryPort::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &_backdoor) +{ + mem.recvMemBackdoorReq(req, _backdoor); +} + bool CfiMemory::MemoryPort::recvTimingReq(PacketPtr pkt) { diff --git a/src/mem/cfi_mem.hh b/src/mem/cfi_mem.hh index 5a7a1c57aa..4a0226736a 100644 --- a/src/mem/cfi_mem.hh +++ b/src/mem/cfi_mem.hh @@ -248,6 +248,8 @@ class CfiMemory : public AbstractMemory Tick recvAtomicBackdoor( PacketPtr pkt, MemBackdoorPtr &_backdoor) override; void recvFunctional(PacketPtr pkt) override; + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &_backdoor) override; bool recvTimingReq(PacketPtr pkt) override; void recvRespRetry() override; AddrRangeList getAddrRanges() const override; @@ -361,6 +363,8 @@ class CfiMemory : public AbstractMemory Tick recvAtomic(PacketPtr pkt); Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &_backdoor); void recvFunctional(PacketPtr pkt); + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &_backdoor); bool recvTimingReq(PacketPtr pkt); void recvRespRetry(); diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 7d1cd5d57d..8163299a09 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -997,6 +997,14 @@ CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_cpu_side_port_id, return std::make_pair(snoop_response_cmd, snoop_response_latency); } +void +CoherentXBar::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + PortID dest_id = findPort(req.range()); + memSidePorts[dest_id]->sendMemBackdoorReq(req, backdoor); +} + void CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id) { diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh index 1c55cc00c8..9693d9225e 100644 --- a/src/mem/coherent_xbar.hh +++ b/src/mem/coherent_xbar.hh @@ -136,6 +136,13 @@ class CoherentXBar : public BaseXBar xbar.recvFunctional(pkt, id); } + void + recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override + { + xbar.recvMemBackdoorReq(req, backdoor); + } + AddrRangeList getAddrRanges() const override { @@ -374,6 +381,11 @@ class CoherentXBar : public BaseXBar transaction.*/ void recvFunctional(PacketPtr pkt, PortID cpu_side_port_id); + /** Function called by the port when the crossbar receives a request for + a memory backdoor.*/ + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor); + /** Function called by the port when the crossbar is receiving a functional snoop transaction.*/ void recvFunctionalSnoop(PacketPtr pkt, PortID mem_side_port_id); diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc index c65d68a5a7..beaace1cbf 100644 --- a/src/mem/mem_ctrl.cc +++ b/src/mem/mem_ctrl.cc @@ -1364,6 +1364,17 @@ MemCtrl::recvFunctional(PacketPtr pkt) pkt->print()); } +void +MemCtrl::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + panic_if(!dram->getAddrRange().contains(req.range().start()), + "Can't handle address range for backdoor %s.", + req.range().to_string()); + + dram->getBackdoor(backdoor); +} + bool MemCtrl::recvFunctionalLogic(PacketPtr pkt, MemInterface* mem_intr) { @@ -1474,6 +1485,13 @@ MemCtrl::MemoryPort::recvFunctional(PacketPtr pkt) pkt->popLabel(); } +void +MemCtrl::MemoryPort::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + ctrl.recvMemBackdoorReq(req, backdoor); +} + Tick MemCtrl::MemoryPort::recvAtomic(PacketPtr pkt) { diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh index fe5d478280..2819fb4caa 100644 --- a/src/mem/mem_ctrl.hh +++ b/src/mem/mem_ctrl.hh @@ -267,6 +267,8 @@ class MemCtrl : public qos::MemCtrl PacketPtr pkt, MemBackdoorPtr &backdoor) override; void recvFunctional(PacketPtr pkt) override; + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override; bool recvTimingReq(PacketPtr) override; @@ -784,6 +786,8 @@ class MemCtrl : public qos::MemCtrl virtual Tick recvAtomic(PacketPtr pkt); virtual Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor); virtual void recvFunctional(PacketPtr pkt); + virtual void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor); virtual bool recvTimingReq(PacketPtr pkt); bool recvFunctionalLogic(PacketPtr pkt, MemInterface* mem_intr); diff --git a/src/mem/noncoherent_xbar.cc b/src/mem/noncoherent_xbar.cc index 67efdba84a..0a378e2c63 100644 --- a/src/mem/noncoherent_xbar.cc +++ b/src/mem/noncoherent_xbar.cc @@ -284,6 +284,14 @@ NoncoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id, return response_latency; } +void +NoncoherentXBar::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + PortID dest_id = findPort(req.range()); + memSidePorts[dest_id]->sendMemBackdoorReq(req, backdoor); +} + void NoncoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id) { diff --git a/src/mem/noncoherent_xbar.hh b/src/mem/noncoherent_xbar.hh index ab833148b5..03f751b77d 100644 --- a/src/mem/noncoherent_xbar.hh +++ b/src/mem/noncoherent_xbar.hh @@ -126,6 +126,13 @@ class NoncoherentXBar : public BaseXBar xbar.recvFunctional(pkt, id); } + void + recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override + { + xbar.recvMemBackdoorReq(req, backdoor); + } + AddrRangeList getAddrRanges() const override { @@ -179,6 +186,8 @@ class NoncoherentXBar : public BaseXBar Tick recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id, MemBackdoorPtr *backdoor=nullptr); void recvFunctional(PacketPtr pkt, PortID cpu_side_port_id); + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor); public: diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc index ced3a38cf4..27fcac1183 100644 --- a/src/mem/simple_mem.cc +++ b/src/mem/simple_mem.cc @@ -108,6 +108,13 @@ SimpleMemory::recvFunctional(PacketPtr pkt) pkt->popLabel(); } +void +SimpleMemory::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &_backdoor) +{ + getBackdoor(_backdoor); +} + bool SimpleMemory::recvTimingReq(PacketPtr pkt) { @@ -294,6 +301,13 @@ SimpleMemory::MemoryPort::recvFunctional(PacketPtr pkt) mem.recvFunctional(pkt); } +void +SimpleMemory::MemoryPort::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + mem.recvMemBackdoorReq(req, backdoor); +} + bool SimpleMemory::MemoryPort::recvTimingReq(PacketPtr pkt) { diff --git a/src/mem/simple_mem.hh b/src/mem/simple_mem.hh index fc6d6849d5..75a03fbe0e 100644 --- a/src/mem/simple_mem.hh +++ b/src/mem/simple_mem.hh @@ -98,6 +98,8 @@ class SimpleMemory : public AbstractMemory Tick recvAtomicBackdoor( PacketPtr pkt, MemBackdoorPtr &_backdoor) override; void recvFunctional(PacketPtr pkt) override; + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override; bool recvTimingReq(PacketPtr pkt) override; void recvRespRetry() override; AddrRangeList getAddrRanges() const override; @@ -191,6 +193,8 @@ class SimpleMemory : public AbstractMemory Tick recvAtomic(PacketPtr pkt); Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &_backdoor); void recvFunctional(PacketPtr pkt); + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor); bool recvTimingReq(PacketPtr pkt); void recvRespRetry(); }; diff --git a/src/mem/sys_bridge.hh b/src/mem/sys_bridge.hh index 8fa3131f25..15a3fc8270 100644 --- a/src/mem/sys_bridge.hh +++ b/src/mem/sys_bridge.hh @@ -331,6 +331,13 @@ class SysBridge : public SimObject pkt->requestorId()); } + void + recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override + { + targetPort->sendMemBackdoorReq(req, backdoor); + } + AddrRangeList getAddrRanges() const override { diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.cc b/src/systemc/tlm_bridge/gem5_to_tlm.cc index 10f7d1a9c7..a5eb9df27e 100644 --- a/src/systemc/tlm_bridge/gem5_to_tlm.cc +++ b/src/systemc/tlm_bridge/gem5_to_tlm.cc @@ -509,6 +509,31 @@ Gem5ToTlmBridge::recvFunctional(PacketPtr packet) trans->release(); } +template +void +Gem5ToTlmBridge::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + // Create a transaction to send along to TLM's get_direct_mem_ptr. + tlm::tlm_generic_payload *trans = mm.allocate(); + trans->acquire(); + trans->set_address(req.range().start()); + trans->set_data_length(req.range().size()); + trans->set_streaming_width(req.range().size()); + trans->set_data_ptr(nullptr); + + if (req.writeable()) + trans->set_command(tlm::TLM_WRITE_COMMAND); + else if (req.readable()) + trans->set_command(tlm::TLM_READ_COMMAND); + else + trans->set_command(tlm::TLM_IGNORE_COMMAND); + + backdoor = getBackdoor(*trans); + + trans->release(); +} + template tlm::tlm_sync_enum Gem5ToTlmBridge::nb_transport_bw(tlm::tlm_generic_payload &trans, diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.hh b/src/systemc/tlm_bridge/gem5_to_tlm.hh index 0cb925ee55..23415b843b 100644 --- a/src/systemc/tlm_bridge/gem5_to_tlm.hh +++ b/src/systemc/tlm_bridge/gem5_to_tlm.hh @@ -62,6 +62,7 @@ #include #include +#include "mem/backdoor.hh" #include "mem/port.hh" #include "params/Gem5ToTlmBridgeBase.hh" #include "sim/system.hh" @@ -117,6 +118,12 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase { return bridge.recvFunctional(pkt); } + void + recvMemBackdoorReq(const gem5::MemBackdoorReq &req, + gem5::MemBackdoorPtr &backdoor) override + { + bridge.recvMemBackdoorReq(req, backdoor); + } bool recvTimingReq(gem5::PacketPtr pkt) override { @@ -179,6 +186,8 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase gem5::Tick recvAtomicBackdoor(gem5::PacketPtr pkt, gem5::MemBackdoorPtr &backdoor); void recvFunctional(gem5::PacketPtr packet); + void recvMemBackdoorReq(const gem5::MemBackdoorReq &req, + gem5::MemBackdoorPtr &backdoor); bool recvTimingReq(gem5::PacketPtr packet); bool tryTiming(gem5::PacketPtr packet); bool recvTimingSnoopResp(gem5::PacketPtr packet); diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.cc b/src/systemc/tlm_bridge/tlm_to_gem5.cc index 703e118dee..468ea83f37 100644 --- a/src/systemc/tlm_bridge/tlm_to_gem5.cc +++ b/src/systemc/tlm_bridge/tlm_to_gem5.cc @@ -401,13 +401,26 @@ bool TlmToGem5Bridge::get_direct_mem_ptr(tlm::tlm_generic_payload &trans, tlm::tlm_dmi &dmi_data) { - auto [pkt, pkt_created] = payload2packet(_id, trans); - pkt->pushSenderState(new Gem5SystemC::TlmSenderState(trans)); - if (pkt_created) - pkt->req->setFlags(Request::NO_ACCESS); + MemBackdoor::Flags flags; + switch (trans.get_command()) { + case tlm::TLM_READ_COMMAND: + flags = MemBackdoor::Readable; + break; + case tlm::TLM_WRITE_COMMAND: + flags = MemBackdoor::Writeable; + break; + default: + panic("TlmToGem5Bridge: " + "received transaction with unsupported command"); + } + Addr start_addr = trans.get_address(); + Addr length = trans.get_data_length(); + MemBackdoorReq req({start_addr, start_addr + length}, flags); MemBackdoorPtr backdoor = nullptr; - bmp.sendAtomicBackdoor(pkt, backdoor); + + bmp.sendMemBackdoorReq(req, backdoor); + if (backdoor) { trans.set_dmi_allowed(true); dmi_data.set_dmi_ptr(backdoor->ptr()); @@ -434,17 +447,7 @@ TlmToGem5Bridge::get_direct_mem_ptr(tlm::tlm_generic_payload &trans, } } - gem5::Packet::SenderState *senderState = pkt->popSenderState(); - sc_assert( - nullptr != dynamic_cast(senderState)); - - // clean up - delete senderState; - - setPayloadResponse(trans, pkt); - - if (pkt_created) - destroyPacket(pkt); + trans.set_response_status(tlm::TLM_OK_RESPONSE); return backdoor != nullptr; } From 00c2f09bd966988c164a6e6f0b2667f2d4571064 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 21 Nov 2022 11:52:57 -0800 Subject: [PATCH 015/492] stdlib,configs: Update riscvmatched-fs example docstring This documentation string provided in the "config/example/gem5_library/riscvmatched-fs.py" was minimal. This patch adds more detail. Change-Id: I0f203ea6952fc72a078594d7c30853bd426017ff Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65851 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- configs/example/gem5_library/riscvmatched-fs.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py index da47a4be6c..1ed78e81a3 100644 --- a/configs/example/gem5_library/riscvmatched-fs.py +++ b/configs/example/gem5_library/riscvmatched-fs.py @@ -25,8 +25,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -This gem5 configuration script runs a full system Ubuntu image, Linux -kernel and calls m5 exit after the kernel is loaded. +This gem5 configuration script runs the RISCVMatchedBoard in FS mode with a +an Ubuntu 20.04 image and calls m5 exit after the simulation has booted the OS. Usage --- @@ -34,8 +34,7 @@ Usage ``` scons build/RISCV/gem5.opt -./build/RISCV/gem5.opt \ - configs/example/gem5_library/riscvmatched-fs.py +./build/RISCV/gem5.opt configs/example/gem5_library/riscvmatched-fs.py ``` """ From 36f2964d1900f6cafb5596e3014625a38042aada Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 21 Nov 2022 11:56:23 -0800 Subject: [PATCH 016/492] configs,stdlib: Fix import in riscvmatched-fs.py Change-Id: I2ff4139457d32336f40c6655231064a12c4d8694 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65852 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce --- configs/example/gem5_library/riscvmatched-fs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py index 1ed78e81a3..8cf20d9da2 100644 --- a/configs/example/gem5_library/riscvmatched-fs.py +++ b/configs/example/gem5_library/riscvmatched-fs.py @@ -38,9 +38,7 @@ scons build/RISCV/gem5.opt ``` """ -from python.gem5.prebuilt.riscvmatched.riscvmatched_board import ( - RISCVMatchedBoard, -) +from gem5.prebuilt.riscvmatched.riscvmatched_board import RISCVMatchedBoard from gem5.utils.requires import requires from gem5.isas import ISA from gem5.simulate.simulator import Simulator From 5794643e445ca49eec55b567c061f6f5fc3cc2bf Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 21 Nov 2022 11:57:40 -0800 Subject: [PATCH 017/492] configs,stdlib,tests: Update riscvmatched-fs.py to-init The "test-gem5-library-example-riscvmatched-fs" test, which runs "configs/example/gem5_library/riscvmatched-fs.py", was running the script in full. This takes a very long time. Given we already have boot tests for RISCV, it's better to just run this configuration to just the end of the Linux boot (significantly faster than a full OS boot). This patch adds this feature to the config script and modifies the test to utilize it. Change-Id: I1e37a26aab5e9a127ebd64590be79fbc16fe53aa Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65853 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- .../example/gem5_library/riscvmatched-fs.py | 26 ++++++++++++++++++- .../test_gem5_library_examples.py | 2 +- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py index 8cf20d9da2..3e84b8c1ea 100644 --- a/configs/example/gem5_library/riscvmatched-fs.py +++ b/configs/example/gem5_library/riscvmatched-fs.py @@ -44,8 +44,23 @@ from gem5.isas import ISA from gem5.simulate.simulator import Simulator from gem5.resources.workload import Workload +import argparse + requires(isa_required=ISA.RISCV) +parser = argparse.ArgumentParser( + description="A script which uses the RISCVMatchedBoard in FS mode." +) + +parser.add_argument( + "-i", + "--to-init", + action="store_true", + help="Exit the simulation after the Linux Kernel boot.", +) + +args = parser.parse_args() + # instantiate the riscv matched board with default parameters board = RISCVMatchedBoard( clk_freq="1.2GHz", @@ -57,7 +72,16 @@ board = RISCVMatchedBoard( # Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` # instruction which stops the simulation. When the simulation has ended you may # inspect `m5out/system.pc.com_1.device` to see the stdout. -board.set_workload(Workload("riscv-ubuntu-20.04-boot")) +# +# In the case where the `-i` flag is passed, we add the kernel argument +# `init=/root/exit.sh`. This means the simulation will exit after the Linux +# Kernel has booted. +workload = Workload("riscv-ubuntu-20.04-boot") +kernel_args = board.get_default_kernel_args() +if args.to_init: + kernel_args.append("init=/root/exit.sh") +workload.set_parameter("kernel_args", kernel_args) +board.set_workload(workload) simulator = Simulator(board=board) simulator.run() diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py index 28a10b588b..254b15cd4b 100644 --- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py +++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py @@ -319,7 +319,7 @@ gem5_verify_config( "gem5_library", "riscvmatched-fs.py", ), - config_args=[], + config_args=["--to-init"], valid_isas=(constants.riscv_tag,), valid_hosts=constants.supported_hosts, length=constants.very_long_tag, From db35dfb9426b01a2900b9c248834dd3e554622a9 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 21 Nov 2022 12:05:29 -0800 Subject: [PATCH 018/492] tests: Update riscvmatched tests to use ALL/gem5.opt Where possible we are trying to use the ALL/gem5.opt compilation of gem5. This change updates the riscvmatched tests to this. Change-Id: I1c5f1d86cdf5cf29b8964f8a894a3476a7cb290a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65854 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- .../gem5_library_example_tests/test_gem5_library_examples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py index 254b15cd4b..9b5c2c67ff 100644 --- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py +++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py @@ -303,7 +303,7 @@ gem5_verify_config( "riscvmatched-hello.py", ), config_args=[], - valid_isas=(constants.riscv_tag,), + valid_isas=(constants.all_compiled_tag,), valid_hosts=constants.supported_hosts, length=constants.long_tag, ) @@ -320,7 +320,7 @@ gem5_verify_config( "riscvmatched-fs.py", ), config_args=["--to-init"], - valid_isas=(constants.riscv_tag,), + valid_isas=(constants.all_compiled_tag,), valid_hosts=constants.supported_hosts, length=constants.very_long_tag, ) From da12e9650729e4411c5dbfc612f8842988751483 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 21 Nov 2022 13:25:57 -0800 Subject: [PATCH 019/492] configs: Add missing `_pre_instantiate` call in "run_lupv.py" As of this change: https://gem5-review.googlesource.com/c/public/gem5/+/65051, the `_pre_instantiate` function must be called prior to `m5.instantiate` when using the stdlib without the Simulator module. Change-Id: Id5cec3b643d556b0f742719596abb53533b84cbd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65871 Reviewed-by: Bobby Bruce Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Bobby Bruce --- configs/example/lupv/run_lupv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/configs/example/lupv/run_lupv.py b/configs/example/lupv/run_lupv.py index f472f53c69..0056cf8bb4 100644 --- a/configs/example/lupv/run_lupv.py +++ b/configs/example/lupv/run_lupv.py @@ -107,6 +107,7 @@ board.set_kernel_disk_workload( print("Running with ISA: " + processor.get_isa().name) print() root = Root(full_system=True, system=board) +board._pre_instantiate() m5.instantiate() print("Beginning simulation!") From 7230a3e7f0f23621d9d09df3f7420c08a08cc118 Mon Sep 17 00:00:00 2001 From: Quentin Forcioli Date: Thu, 18 Aug 2022 12:26:12 +0200 Subject: [PATCH 020/492] base,sim,ext: Adding GDB signals definition GDB proposes a signal definition that is not necessarily identical to the kernel's. To not lost GDB, we need to add this definition (in ext/remotegdb/signals.hh) and replace the linux signals everywhere where they where used to interact with GDB. (otherwise it doesn't recognize some trap reasons). Change-Id: I2bbfee36313cc766549000cf197c23c2561ea5f9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63534 Reviewed-by: Jason Lowe-Power Maintainer: Andreas Sandberg Reviewed-by: Andreas Sandberg Maintainer: Jason Lowe-Power Reviewed-by: Quentin Forcioli Tested-by: kokoro --- ext/gdbremote/signals.hh | 181 +++++++++++++++++++++++++++++++++++++++ src/arch/power/faults.cc | 6 +- src/base/remote_gdb.cc | 35 ++++---- src/base/remote_gdb.hh | 21 ++--- src/sim/faults.cc | 4 +- src/sim/system.cc | 2 +- src/sim/system.hh | 2 +- src/sim/workload.cc | 2 +- src/sim/workload.hh | 3 +- 9 files changed, 220 insertions(+), 36 deletions(-) create mode 100644 ext/gdbremote/signals.hh diff --git a/ext/gdbremote/signals.hh b/ext/gdbremote/signals.hh new file mode 100644 index 0000000000..11835e6f5a --- /dev/null +++ b/ext/gdbremote/signals.hh @@ -0,0 +1,181 @@ +//===-- Generated From GDBRemoteSignals.cpp ------------------------===// +// +// Part of the LLVM Project, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------===// + +#include + +#ifndef __BASE_GDB_SIGNALS_HH__ +#define __BASE_GDB_SIGNALS_HH__ + +/* +These signals definitions are produced from LLVM's + lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp +*/ +namespace gem5{ + enum class GDBSignal : uint8_t + { + ZERO = 0, //Signal 0 + HUP = 1, //hangup + INT = 2, //interrupt + QUIT = 3, //quit + ILL = 4, //illegal instruction + TRAP = 5, //trace trap (not reset when caught) + ABRT = 6, //SIGIOT + EMT = 7, //emulation trap + FPE = 8, //floating point exception + KILL = 9, //kill + BUS = 10, //bus error + SEGV = 11, //segmentation violation + SYS = 12, //invalid system call + PIPE = 13, //write to pipe with reading end closed + ALRM = 14, //alarm + TERM = 15, //termination requested + URG = 16, //urgent data on socket + STOP = 17, //process stop + TSTP = 18, //tty stop + CONT = 19, //process continue + CHLD = 20, //SIGCLD + TTIN = 21, //background tty read + TTOU = 22, //background tty write + IO = 23, //input/output ready/Pollable event + XCPU = 24, //CPU resource exceeded + XFSZ = 25, //file size limit exceeded + VTALRM = 26, //virtual time alarm + PROF = 27, //profiling time alarm + WINCH = 28, //window size changes + LOST = 29, //resource lost + USR1 = 30, //user defined signal 1 + USR2 = 31, //user defined signal 2 + PWR = 32, //power failure + POLL = 33, //pollable event + WIND = 34, //SIGWIND + PHONE = 35, //SIGPHONE + WAITING = 36, //process's LWPs are blocked + LWP = 37, //signal LWP + DANGER = 38, //swap space dangerously low + GRANT = 39, //monitor mode granted + RETRACT = 40, //need to relinquish monitor mode + MSG = 41, //monitor mode data available + SOUND = 42, //sound completed + SAK = 43, //secure attention + PRIO = 44, //SIGPRIO + + SIG33 = 45, //real-time event 33 + SIG34 = 46, //real-time event 34 + SIG35 = 47, //real-time event 35 + SIG36 = 48, //real-time event 36 + SIG37 = 49, //real-time event 37 + SIG38 = 50, //real-time event 38 + SIG39 = 51, //real-time event 39 + SIG40 = 52, //real-time event 40 + SIG41 = 53, //real-time event 41 + SIG42 = 54, //real-time event 42 + SIG43 = 55, //real-time event 43 + SIG44 = 56, //real-time event 44 + SIG45 = 57, //real-time event 45 + SIG46 = 58, //real-time event 46 + SIG47 = 59, //real-time event 47 + SIG48 = 60, //real-time event 48 + SIG49 = 61, //real-time event 49 + SIG50 = 62, //real-time event 50 + SIG51 = 63, //real-time event 51 + SIG52 = 64, //real-time event 52 + SIG53 = 65, //real-time event 53 + SIG54 = 66, //real-time event 54 + SIG55 = 67, //real-time event 55 + SIG56 = 68, //real-time event 56 + SIG57 = 69, //real-time event 57 + SIG58 = 70, //real-time event 58 + SIG59 = 71, //real-time event 59 + SIG60 = 72, //real-time event 60 + SIG61 = 73, //real-time event 61 + SIG62 = 74, //real-time event 62 + SIG63 = 75, //real-time event 63 + + CANCEL = 76, //LWP internal signal + + SIG32 = 77, //real-time event 32 + SIG64 = 78, //real-time event 64 + SIG65 = 79, //real-time event 65 + SIG66 = 80, //real-time event 66 + SIG67 = 81, //real-time event 67 + SIG68 = 82, //real-time event 68 + SIG69 = 83, //real-time event 69 + SIG70 = 84, //real-time event 70 + SIG71 = 85, //real-time event 71 + SIG72 = 86, //real-time event 72 + SIG73 = 87, //real-time event 73 + SIG74 = 88, //real-time event 74 + SIG75 = 89, //real-time event 75 + SIG76 = 90, //real-time event 76 + SIG77 = 91, //real-time event 77 + SIG78 = 92, //real-time event 78 + SIG79 = 93, //real-time event 79 + SIG80 = 94, //real-time event 80 + SIG81 = 95, //real-time event 81 + SIG82 = 96, //real-time event 82 + SIG83 = 97, //real-time event 83 + SIG84 = 98, //real-time event 84 + SIG85 = 99, //real-time event 85 + SIG86 = 100, //real-time event 86 + SIG87 = 101, //real-time event 87 + SIG88 = 102, //real-time event 88 + SIG89 = 103, //real-time event 89 + SIG90 = 104, //real-time event 90 + SIG91 = 105, //real-time event 91 + SIG92 = 106, //real-time event 92 + SIG93 = 107, //real-time event 93 + SIG94 = 108, //real-time event 94 + SIG95 = 109, //real-time event 95 + SIG96 = 110, //real-time event 96 + SIG97 = 111, //real-time event 97 + SIG98 = 112, //real-time event 98 + SIG99 = 113, //real-time event 99 + SIG100 = 114, //real-time event 100 + SIG101 = 115, //real-time event 101 + SIG102 = 116, //real-time event 102 + SIG103 = 117, //real-time event 103 + SIG104 = 118, //real-time event 104 + SIG105 = 119, //real-time event 105 + SIG106 = 120, //real-time event 106 + SIG107 = 121, //real-time event 107 + SIG108 = 122, //real-time event 108 + SIG109 = 123, //real-time event 109 + SIG110 = 124, //real-time event 110 + SIG111 = 125, //real-time event 111 + SIG112 = 126, //real-time event 112 + SIG113 = 127, //real-time event 113 + SIG114 = 128, //real-time event 114 + SIG115 = 129, //real-time event 115 + SIG116 = 130, //real-time event 116 + SIG117 = 131, //real-time event 117 + SIG118 = 132, //real-time event 118 + SIG119 = 133, //real-time event 119 + SIG120 = 134, //real-time event 120 + SIG121 = 135, //real-time event 121 + SIG122 = 136, //real-time event 122 + SIG123 = 137, //real-time event 123 + SIG124 = 138, //real-time event 124 + SIG125 = 139, //real-time event 125 + SIG126 = 140, //real-time event 126 + SIG127 = 141, //real-time event 127 + + INFO = 142, //information request + unknown = 143, //unknown signal + + EXC_BAD_ACCESS = 145, //could not access memory + EXC_BAD_INSTRUCTION = 146, //illegal instruction/operand + EXC_ARITHMETIC = 147, //arithmetic exception + EXC_EMULATION = 148, //emulation instruction + EXC_SOFTWARE = 149, //software generated exception + EXC_BREAKPOINT = 150, //breakpoint + + LIBRT = 151, //librt internal signal + }; +} +#endif /* __BASE_GDB_SIGNALS_HH__ */ diff --git a/src/arch/power/faults.cc b/src/arch/power/faults.cc index be1796e14a..0d8f2ddd68 100644 --- a/src/arch/power/faults.cc +++ b/src/arch/power/faults.cc @@ -42,7 +42,7 @@ namespace PowerISA void UnimplementedOpcodeFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) { - panic_if(tc->getSystemPtr()->trapToGdb(SIGILL, tc->contextId()), + panic_if(tc->getSystemPtr()->trapToGdb(GDBSignal::ILL, tc->contextId()), "Unimplemented opcode encountered at virtual address %#x\n", tc->pcState().instAddr()); } @@ -50,14 +50,14 @@ UnimplementedOpcodeFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) void AlignmentFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) { - panic_if(!tc->getSystemPtr()->trapToGdb(SIGBUS, tc->contextId()), + panic_if(!tc->getSystemPtr()->trapToGdb(GDBSignal::BUS, tc->contextId()), "Alignment fault when accessing virtual address %#x\n", vaddr); } void TrapFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) { - panic_if(tc->getSystemPtr()->trapToGdb(SIGTRAP, tc->contextId()), + panic_if(tc->getSystemPtr()->trapToGdb(GDBSignal::TRAP, tc->contextId()), "Trap encountered at virtual address %#x\n", tc->pcState().instAddr()); } diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc index c19dede748..47fae75cbb 100644 --- a/src/base/remote_gdb.cc +++ b/src/base/remote_gdb.cc @@ -130,7 +130,6 @@ #include "base/remote_gdb.hh" #include -#include #include #include @@ -192,7 +191,7 @@ class HardBreakpoint : public PCEvent DPRINTF(GDBMisc, "handling hardware breakpoint at %#x\n", pc()); if (tc == gdb->tc) - gdb->trap(tc->contextId(), SIGTRAP,""); + gdb->trap(tc->contextId(), GDBSignal::TRAP,""); } }; @@ -549,7 +548,7 @@ BaseRemoteGDB::selectThreadContext(ContextID id) // makes sense to use POSIX errno values, because that is what the // gdb/remote.c functions want to return. void -BaseRemoteGDB::trap(ContextID id, int signum,const std::string& stopReason) +BaseRemoteGDB::trap(ContextID id, GDBSignal sig,const std::string& stopReason) { if (!attached) return; @@ -575,10 +574,10 @@ BaseRemoteGDB::trap(ContextID id, int signum,const std::string& stopReason) send("OK"); } else { // Tell remote host that an exception has occurred. - sendTPacket(signum,id,stopReason); + sendTPacket(sig,id,stopReason); } - processCommands(signum); + processCommands(sig); } bool @@ -613,7 +612,7 @@ BaseRemoteGDB::incomingData(int revent) } if (revent & POLLIN) { - scheduleTrapEvent(tc->contextId(),SIGILL,0,""); + scheduleTrapEvent(tc->contextId(),GDBSignal::ILL,0,""); } else if (revent & POLLNVAL) { descheduleInstCommitEvent(&trapEvent); scheduleInstCommitEvent(&disconnectEvent, 0); @@ -766,14 +765,14 @@ BaseRemoteGDB::send(const char *bp) } void -BaseRemoteGDB::processCommands(int signum) +BaseRemoteGDB::processCommands(GDBSignal sig) { // Stick frame regs into our reg cache. regCachePtr = gdbRegs(); regCachePtr->getRegs(tc); GdbCommand::Context cmd_ctx; - cmd_ctx.type = signum; + cmd_ctx.type = sig; std::vector data; for (;;) { @@ -882,7 +881,7 @@ BaseRemoteGDB::singleStep() { if (!singleStepEvent.scheduled()) scheduleInstCommitEvent(&singleStepEvent, 1); - trap(tc->contextId(), SIGTRAP); + trap(tc->contextId(), GDBSignal::TRAP); } void @@ -951,18 +950,20 @@ BaseRemoteGDB::removeHardBreak(Addr addr, size_t kind) } void -BaseRemoteGDB::sendTPacket(int errnum, ContextID id, +BaseRemoteGDB::sendTPacket(GDBSignal sig, ContextID id, const std::string& stopReason) { if (!stopReason.empty()){ - send("T%02xcore:%x;thread:%x;%s;",errnum,id + 1,id + 1,stopReason); + send("T%02xcore:%x;thread:%x;%s;", + (uint8_t)sig,id + 1,id + 1,stopReason); }else{ - send("T%02xcore:%x;thread:%x;",errnum,id + 1,id + 1); + send("T%02xcore:%x;thread:%x;", + (uint8_t)sig,id + 1,id + 1); } } void -BaseRemoteGDB::sendSPacket(int errnum){ - send("S%02x",errnum); +BaseRemoteGDB::sendSPacket(GDBSignal sig){ + send("S%02x",(uint8_t)sig); } void BaseRemoteGDB::sendOPacket(const std::string message){ @@ -970,12 +971,12 @@ BaseRemoteGDB::sendOPacket(const std::string message){ } void -BaseRemoteGDB::scheduleTrapEvent(ContextID id,int type,int delta, +BaseRemoteGDB::scheduleTrapEvent(ContextID id,GDBSignal sig,int delta, std::string stopReason){ ThreadContext* _tc = threads[id]; panic_if(_tc == nullptr, "Unknown context id :%i",id); trapEvent.id(id); - trapEvent.type(type); + trapEvent.type(sig); trapEvent.stopReason(stopReason); if (!trapEvent.scheduled()) scheduleInstCommitEvent(&trapEvent,delta,_tc); @@ -1171,7 +1172,7 @@ BaseRemoteGDB::cmdSetThread(GdbCommand::Context &ctx) throw CmdError("E04"); // Line up on an instruction boundary in the new thread. threadSwitching = true; - scheduleTrapEvent(tid,0,0,""); + scheduleTrapEvent(tid,GDBSignal::ZERO,0,""); return false; } } else { diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh index 4da1dcc0c8..c23b4ac95e 100644 --- a/src/base/remote_gdb.hh +++ b/src/base/remote_gdb.hh @@ -42,7 +42,6 @@ #ifndef __REMOTE_GDB_HH__ #define __REMOTE_GDB_HH__ -#include #include #include @@ -56,6 +55,7 @@ #include "base/socket.hh" #include "base/types.hh" #include "cpu/pc_event.hh" +#include "gdbremote/signals.hh" #include "sim/debug.hh" #include "sim/eventq.hh" @@ -171,10 +171,10 @@ class BaseRemoteGDB void replaceThreadContext(ThreadContext *_tc); bool selectThreadContext(ContextID id); - void trap(ContextID id, int signum,const std::string& stopReason=""); + void trap(ContextID id, GDBSignal sig,const std::string& stopReason=""); bool sendMessage(std::string message); //schedule a trap event with these properties - void scheduleTrapEvent(ContextID id,int type, int delta, + void scheduleTrapEvent(ContextID id,GDBSignal type, int delta, std::string stopReason); /** @} */ // end of api_remote_gdb @@ -259,7 +259,7 @@ class BaseRemoteGDB * or SW trap), 'signum' is the signal value reported back to GDB * in "S" packet (this is done in trap()). */ - void processCommands(int signum=0); + void processCommands(GDBSignal sig=GDBSignal::ZERO); /* * Simulator side debugger state. @@ -280,7 +280,7 @@ class BaseRemoteGDB class TrapEvent : public Event { protected: - int _type; + GDBSignal _type; ContextID _id; std::string _stopReason; BaseRemoteGDB *gdb; @@ -289,7 +289,7 @@ class BaseRemoteGDB TrapEvent(BaseRemoteGDB *g) : gdb(g) {} - void type(int t) { _type = t; } + void type(GDBSignal t) { _type = t; } void stopReason(std::string s) {_stopReason = s; } void id(ContextID id) { _id = id; } void process() { gdb->trap(_id, _type,_stopReason); } @@ -327,8 +327,9 @@ class BaseRemoteGDB void insertHardBreak(Addr addr, size_t kind); void removeHardBreak(Addr addr, size_t kind); - void sendTPacket(int errnum, ContextID id,const std::string& stopReason); - void sendSPacket(int errnum); + void sendTPacket(GDBSignal sig, ContextID id, + const std::string& stopReason); + void sendSPacket(GDBSignal sig); //The OPacket allow to send string to be displayed by the remote GDB void sendOPacket(const std::string message); /* @@ -341,7 +342,7 @@ class BaseRemoteGDB { const GdbCommand *cmd; char cmdByte; - int type; + GDBSignal type; char *data; int len; }; @@ -363,7 +364,7 @@ class BaseRemoteGDB { const GdbMultiLetterCommand *cmd; std::string cmdTxt; - int type; + GDBSignal type; char *data; int len; }; diff --git a/src/sim/faults.cc b/src/sim/faults.cc index c0a7d76eaa..3049b3be42 100644 --- a/src/sim/faults.cc +++ b/src/sim/faults.cc @@ -100,14 +100,14 @@ GenericPageTableFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) handled = p->fixupFault(vaddr); } panic_if(!handled && - !tc->getSystemPtr()->trapToGdb(SIGSEGV, tc->contextId()), + !tc->getSystemPtr()->trapToGdb(GDBSignal::SEGV, tc->contextId()), "Page table fault when accessing virtual address %#x\n", vaddr); } void GenericAlignmentFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) { - panic_if(!tc->getSystemPtr()->trapToGdb(SIGSEGV, tc->contextId()), + panic_if(!tc->getSystemPtr()->trapToGdb(GDBSignal::SEGV, tc->contextId()), "Alignment fault when accessing virtual address %#x\n", vaddr); } diff --git a/src/sim/system.cc b/src/sim/system.cc index 5f67c4d2b8..ee6c70a5d3 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -391,7 +391,7 @@ System::workItemEnd(uint32_t tid, uint32_t workid) } bool -System::trapToGdb(int signal, ContextID ctx_id) const +System::trapToGdb(GDBSignal signal, ContextID ctx_id) const { return workload->trapToGdb(signal, ctx_id); } diff --git a/src/sim/system.hh b/src/sim/system.hh index 7738d561c3..d691fb8bf8 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -554,7 +554,7 @@ class System : public SimObject, public PCEventScope void workItemEnd(uint32_t tid, uint32_t workid); /* Returns whether we successfully trapped into GDB. */ - bool trapToGdb(int signal, ContextID ctx_id) const; + bool trapToGdb(GDBSignal signal, ContextID ctx_id) const; protected: /** diff --git a/src/sim/workload.cc b/src/sim/workload.cc index ca51bbdb73..ceb1029f77 100644 --- a/src/sim/workload.cc +++ b/src/sim/workload.cc @@ -72,7 +72,7 @@ Workload::replaceThreadContext(ThreadContext *tc) } bool -Workload::trapToGdb(int signal, ContextID ctx_id) +Workload::trapToGdb(GDBSignal signal, ContextID ctx_id) { if (gdb && gdb->isAttached()) { gdb->trap(ctx_id, signal); diff --git a/src/sim/workload.hh b/src/sim/workload.hh index f9bb8dba3c..10129379e0 100644 --- a/src/sim/workload.hh +++ b/src/sim/workload.hh @@ -34,6 +34,7 @@ #include "base/loader/object_file.hh" #include "base/loader/symtab.hh" #include "enums/ByteOrder.hh" +#include "gdbremote/signals.hh" #include "params/StubWorkload.hh" #include "params/Workload.hh" #include "sim/sim_object.hh" @@ -91,7 +92,7 @@ class Workload : public SimObject // Once trapping into GDB is no longer a special case routed through the // system object, this helper can be removed. - bool trapToGdb(int signal, ContextID ctx_id); + bool trapToGdb(GDBSignal sig, ContextID ctx_id); bool sendToGdb(std::string msg); virtual void registerThreadContext(ThreadContext *tc); From d401b1fbadff4a69059e6a2132ad500f34cf2c65 Mon Sep 17 00:00:00 2001 From: Quentin Forcioli Date: Tue, 16 Aug 2022 17:43:31 +0200 Subject: [PATCH 021/492] base,sim: Adding monitor function to GDB The remote protocol provides a monitor query. This query allows to provide a implementation defined behavior in the stub. I proposed to use this command as a way to quit simulation with a message provided by the GDB client. Thus calling "monitor my_message" in the client will exit the simulation with the exit message "GDB_MONITOR:my_message". This is implemented through a derived class based on GlobalSimLoopExitEvent and a small addition to the based class that adds a clean method that will be called when returning siumation after the Event. Change-Id: Ib5fda569edcf6733cbcc6240ef6d2ec4dc6502ec Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63538 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/base/remote_gdb.cc | 46 +++++++++++++++++++++++++++++++++++++----- src/base/remote_gdb.hh | 1 + src/sim/eventq.hh | 1 + src/sim/sim_events.hh | 7 +++++-- src/sim/simulate.cc | 5 ++++- 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc index 47fae75cbb..43f53d1247 100644 --- a/src/base/remote_gdb.cc +++ b/src/base/remote_gdb.cc @@ -157,6 +157,7 @@ #include "mem/translating_port_proxy.hh" #include "sim/full_system.hh" #include "sim/process.hh" +#include "sim/sim_events.hh" #include "sim/system.hh" namespace gem5 @@ -241,7 +242,7 @@ hex2c(char c0,char c1) //this function will be used in a future patch //convert a encoded string to a string -[[maybe_unused]] std::string +std::string hexS2string(std::string hex_in) { std::string out=""; @@ -554,7 +555,6 @@ BaseRemoteGDB::trap(ContextID id, GDBSignal sig,const std::string& stopReason) return; if (tc->contextId() != id) { - //prevent thread switch when single stepping if (singleStepEvent.scheduled()){ return; @@ -564,11 +564,14 @@ BaseRemoteGDB::trap(ContextID id, GDBSignal sig,const std::string& stopReason) return; } + DPRINTF(GDBMisc, "trap: PC=%s\n", tc->pcState()); clearSingleStep(); - - if (threadSwitching) { + if (stopReason=="monitor_return"){ + //should wnot send any Tpacket here + send("OK"); + }else if (threadSwitching) { threadSwitching = false; // Tell GDB the thread switch has completed. send("OK"); @@ -1326,6 +1329,7 @@ splitAt(std::string str, const char * const delim) std::map BaseRemoteGDB::queryMap = { { "C", { &BaseRemoteGDB::queryC } }, + { "Rcmd", { &BaseRemoteGDB::queryRcmd} }, { "Attached", { &BaseRemoteGDB::queryAttached} }, { "Supported", { &BaseRemoteGDB::querySupported, ";" } }, { "Xfer", { &BaseRemoteGDB::queryXfer } }, @@ -1416,6 +1420,38 @@ BaseRemoteGDB::queryAttached(QuerySetCommand::Context &ctx) return true; } +class MonitorCallEvent : public GlobalSimLoopExitEvent +{ + BaseRemoteGDB& gdb; + ContextID id; + public: + MonitorCallEvent(BaseRemoteGDB& gdb,ContextID id,const std::string &_cause, + int code): + GlobalSimLoopExitEvent(_cause,code), gdb(gdb),id(id) + {}; + void process() override{ + GlobalSimLoopExitEvent::process(); + } + void clean() override{ + //trapping now + //this is the only point in time when we can call trap + //before any breakpoint triggers + gdb.trap(id,GDBSignal::ZERO,"monitor_return"); + delete this; + } + ~MonitorCallEvent(){ + DPRINTF(Event,"MonitorCallEvent destructed\n");; + } +}; + +bool +BaseRemoteGDB::queryRcmd(QuerySetCommand::Context &ctx){ + std::string message=hexS2string(ctx.args[0]); + DPRINTF(GDBMisc, "Rcmd Query: %s => %s\n", ctx.args[0],message); + //Tick when = curTick(); + new MonitorCallEvent(*this,tc->contextId(),"GDB_MONITOR:"+ message, 0); + return false; +} bool BaseRemoteGDB::queryFThreadInfo(QuerySetCommand::Context &ctx) @@ -1444,7 +1480,7 @@ BaseRemoteGDB::cmdQueryVar(GdbCommand::Context &ctx) { // The query command goes until the first ':', or the end of the string. std::string s(ctx.data, ctx.len); - auto query_split = splitAt({ ctx.data, (size_t)ctx.len }, ":"); + auto query_split = splitAt({ ctx.data, (size_t)ctx.len }, ":,"); const auto &query_str = query_split.first; // Look up the query command, and report if it isn't found. diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh index c23b4ac95e..1c5cd9c7af 100644 --- a/src/base/remote_gdb.hh +++ b/src/base/remote_gdb.hh @@ -433,6 +433,7 @@ class BaseRemoteGDB bool querySupported(QuerySetCommand::Context &ctx); bool queryXfer(QuerySetCommand::Context &ctx); bool querySymbol(QuerySetCommand::Context &ctx); + bool queryRcmd(QuerySetCommand::Context &ctx); bool queryAttached(QuerySetCommand::Context &ctx); size_t threadInfoIdx = 0; diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index a7226ffead..cd5d285f93 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -46,6 +46,7 @@ #include "base/debug.hh" #include "base/flags.hh" +#include "base/trace.hh" #include "base/types.hh" #include "base/uncontended_mutex.hh" #include "debug/Event.hh" diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh index 06a8e6548d..a1ffc7b34a 100644 --- a/src/sim/sim_events.hh +++ b/src/sim/sim_events.hh @@ -68,8 +68,11 @@ class GlobalSimLoopExitEvent : public GlobalEvent const std::string getCause() const { return cause; } int getCode() const { return code; } - void process(); // process event - + virtual void process();// process event + virtual void clean(){};//cleaning event + ~GlobalSimLoopExitEvent (){ + DPRINTF(Event,"GlobalSimLoopExitEvent destructed\n"); + }; virtual const char *description() const; }; diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc index c5d07942ef..0c30f10570 100644 --- a/src/sim/simulate.cc +++ b/src/sim/simulate.cc @@ -184,9 +184,12 @@ struct DescheduleDeleter * terminate the loop. Exported to Python. * @return The SimLoopExitEvent that caused the loop to exit. */ +GlobalSimLoopExitEvent *global_exit_event= nullptr; GlobalSimLoopExitEvent * simulate(Tick num_cycles) { + if (global_exit_event)//cleaning last global exit event + global_exit_event->clean(); std::unique_ptr quantum_event; const Tick exit_tick = num_cycles < MaxTick - curTick() ? curTick() + num_cycles : MaxTick; @@ -224,7 +227,7 @@ simulate(Tick num_cycles) BaseGlobalEvent *global_event = local_event->globalEvent(); assert(global_event); - GlobalSimLoopExitEvent *global_exit_event = + global_exit_event = dynamic_cast(global_event); assert(global_exit_event); From 4054565b853cb8b22ce30b9e0bbed944060d313b Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Tue, 22 Nov 2022 14:30:35 -0800 Subject: [PATCH 022/492] tests: Delete build directory before running KVM in nightly The nightly tests failed here: https://jenkins.gem5.org/job/nightly/430/. What seems to have happened is the ALL/gem5.opt us compiled within the Docker container but then, for the KVM tests, there is an attempt to recompile on the host, which causes compilation problems. The safest strategy here is delete the build directory prior to running the KVM tests. In latest versions of our test infrastructure, the KVM tests should be run completely separately (i.e., in different Jenkin's jobs) to avoid this. Change-Id: Id7d18c0504dd324f7a0e5e9a7809463520969dda Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65911 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce --- tests/nightly.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/nightly.sh b/tests/nightly.sh index a082158e01..ec15f91a78 100755 --- a/tests/nightly.sh +++ b/tests/nightly.sh @@ -89,6 +89,10 @@ docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \ # removes all those part of the 'very-long' (weekly) tests, or for compilation # to '.debug' or '.fast'. We also remove ARM targets as our Jenkins is an X86 # system. Users wishing to run this script elsewhere should be aware of this. +# Note: we delete the build directory here. It was build in the +# "ubuntu-22.04_all-dependencies" docker image which may not be compatible with +# the host environment. +rm -rf "${gem5_root}/build" cd "${gem5_root}/tests" ./main.py run -j${compile_threads} -vv \ --exclude-tags ".*" --include-tags kvm --exclude-tags very\-long \ From 92027a68ceea09624b4ce17da81d05ede883c484 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 23 Nov 2022 14:20:14 -0800 Subject: [PATCH 023/492] configs: Set CPU vendor to M5 Simulator in apu_se.py Other vendor strings causes, for some reason, bad addresses to be computed when running the GPU model. This change reverts back to M5 Simulator only for apu_se.py. Change-Id: I5992b4e31569f5c0e5e49e523908c8fa0602f845 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65991 Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Reviewed-by: Jason Lowe-Power --- configs/example/apu_se.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 39def024fc..8e8bc60fe8 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -757,6 +757,11 @@ if fast_forward: (cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus) ] +# Other CPU strings cause bad addresses in ROCm. Revert back to M5 Simulator. +for (i, cpu) in enumerate(cpu_list): + for j in range(len(cpu)): + cpu.isa[j].vendor_string = "M5 Simulator" + # Full list of processing cores in the system. cpu_list = cpu_list + [shader] + cp_list From 770b84c2ee098efdc7a0fa768334d45ab9720de6 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 28 Nov 2022 13:02:49 -0800 Subject: [PATCH 024/492] sim: Add missing virtual destructor to GlobalSyncEvent This missing destructor in GlobalSyncEvent was causing a compilation error in gcc-12, thus causing the compiler-tests to fail: https://jenkins.gem5.org/job/compiler-checks/436/ In addition a destructor was added to BaseGlobalEventTemplate. This does not directly fix the aforementioned bug provides some additional security. Change-Id: Iab86d3f6d55064ba3b6a8a7cb01fb14533cce4b9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66152 Maintainer: Jason Lowe-Power Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power --- src/sim/global_event.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sim/global_event.hh b/src/sim/global_event.hh index 05ae6ca9ad..36332043c6 100644 --- a/src/sim/global_event.hh +++ b/src/sim/global_event.hh @@ -162,6 +162,8 @@ class BaseGlobalEventTemplate : public BaseGlobalEvent for (int i = 0; i < numMainEventQueues; ++i) barrierEvent[i] = new typename Derived::BarrierEvent(this, p, f); } + + virtual ~BaseGlobalEventTemplate(){} }; @@ -229,6 +231,8 @@ class GlobalSyncEvent : public BaseGlobalEventTemplate schedule(when); } + virtual ~GlobalSyncEvent (){} + void process(); const char *description() const; From 8391f47bc9293ed4b13740c747ebc1894cf15f2c Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Sat, 26 Nov 2022 00:48:18 +0000 Subject: [PATCH 025/492] stdlib: More helpful message for the filelock error Change-Id: Ib8e3bc9fc145a9604670e8288209ac62bfbd7932 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66091 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/python/gem5/utils/filelock.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/python/gem5/utils/filelock.py b/src/python/gem5/utils/filelock.py index 82e1122bf9..a6798e9f53 100644 --- a/src/python/gem5/utils/filelock.py +++ b/src/python/gem5/utils/filelock.py @@ -69,12 +69,22 @@ class FileLock(object): except OSError as e: if e.errno != errno.EEXIST: raise + solution_message = ( + "This is likely due to the existence" + " of the lock file '{}'. If there's no other process" + " the lock file, you can manually delete the lock file and" + " rerun the script.".format(self.lockfile) + ) if self.timeout is None: raise FileLockException( - "Could not acquire lock on {}".format(self.file_name) + "Could not acquire lock on {}. {}".format( + self.file_name, solution_message + ) ) if (time.time() - start_time) >= self.timeout: - raise FileLockException("Timeout occured.") + raise FileLockException( + "Timeout occured. {}".format(solution_message) + ) time.sleep(self.delay) # self.is_locked = True From c8949f085fdfcc590be501b85fe3c0cc9bdb80dc Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Mon, 14 Nov 2022 03:36:52 +0000 Subject: [PATCH 026/492] stdlib: Change #virtual_networks of mesi_two_level to 3 A grep of "virtual_network" in src/mem/ruby/protocol shows that files in MESI_Two_Level.slicc utilizes at 3 virtual networks. Change-Id: I3e8dd09dd82b9c802fdf91145c6d998bc6db541b Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65592 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Jason Lowe-Power --- .../cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py index 82089a5bdc..79c8b0ada3 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py @@ -83,11 +83,11 @@ class MESITwoLevelCacheHierarchy( self.ruby_system = RubySystem() - # MESI_Two_Level needs 5 virtual networks - self.ruby_system.number_of_virtual_networks = 5 + # MESI_Two_Level needs 3 virtual networks + self.ruby_system.number_of_virtual_networks = 3 self.ruby_system.network = SimplePt2Pt(self.ruby_system) - self.ruby_system.network.number_of_virtual_networks = 5 + self.ruby_system.network.number_of_virtual_networks = 3 self._l1_controllers = [] for i, core in enumerate(board.get_processor().get_cores()): From f99947059d4bd22cf066f5261b10be4e8e333fc5 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Sat, 12 Nov 2022 08:00:49 +0000 Subject: [PATCH 027/492] stdlib: Clean up Ruby cache directory - Fix typos. - Fix type inconsistencies. Change-Id: I98d82ec7e62130abb09295c5ec6cde86b1f7fa27 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65571 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- .../cachehierarchies/abstract_two_level_cache_hierarchy.py | 6 ++---- .../cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py index 17cae4aba8..d6a035f2cb 100644 --- a/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py @@ -24,8 +24,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# from .abstract_cache_hierarchy import AbstractCacheHierarchy - class AbstractTwoLevelCacheHierarchy: """ @@ -51,9 +49,9 @@ class AbstractTwoLevelCacheHierarchy: :type l1i_assoc: int - :param l1dsize: The size of the LL1 Data cache (e.g. "32kB"). + :param l1d_size: The size of the L1 Data cache (e.g. "32kB"). - :type l1dsize: str + :type l1d_size: str :param l1d_assoc: diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py index 56cb5b2ec1..81ef4dbe90 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py @@ -45,7 +45,7 @@ class L2Cache(AbstractL2Cache): start_index_bit=self.getIndexBit(num_l2Caches), ) - self.transitions_per_cycle = "4" + self.transitions_per_cycle = 4 def getIndexBit(self, num_l2caches): l2_bits = int(math.log(num_l2caches, 2)) From eac06ad681ad48a08f946dda60b42adadfada3bf Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Mon, 28 Nov 2022 03:26:32 +0000 Subject: [PATCH 028/492] python: Fix multiline quotes in a single line An example case, ```python mem_side_port = RequestPort( "This port sends requests and " "receives responses" ) ``` This is the residue of running the python formatter. This is done by finding all tokens matching the regex `"\s"(?![.;"])` and manually replacing them by empty strings. Change-Id: Icf223bbe889e5fa5749a81ef77aa6e721f38b549 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66111 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- build_tools/cxx_config_cc.py | 4 +- build_tools/enum_cc.py | 2 +- configs/common/Options.py | 4 +- configs/example/apu_se.py | 18 ++- configs/example/arm/baremetal.py | 8 +- configs/example/arm/dist_bigLITTLE.py | 6 +- configs/example/gpufs/amd/AmdGPUOptions.py | 16 +-- configs/example/gpufs/runfs.py | 4 +- src/arch/arm/ArmFsWorkload.py | 4 +- src/arch/arm/ArmPMU.py | 2 +- .../fastmodel/CortexA76/FastModelCortexA76.py | 2 +- .../fastmodel/CortexR52/FastModelCortexR52.py | 42 +++---- src/arch/arm/fastmodel/GIC/FastModelGIC.py | 114 +++++++++--------- .../fastmodel/PL330_DMAC/FastModelPL330.py | 2 +- src/arch/isa_parser/isa_parser.py | 4 +- src/cpu/BaseCPU.py | 2 +- src/cpu/minor/BaseMinorCPU.py | 14 +-- src/cpu/o3/BaseO3CPU.py | 32 ++--- src/cpu/o3/probe/ElasticTrace.py | 6 +- src/cpu/testers/memtest/MemTest.py | 4 +- src/cpu/testers/rubytest/RubyTester.py | 2 +- src/cpu/testers/traffic_gen/BaseTrafficGen.py | 2 +- src/cpu/testers/traffic_gen/GUPSGen.py | 4 +- src/cpu/trace/TraceCPU.py | 6 +- src/dev/amdgpu/AMDGPU.py | 2 +- src/dev/arm/RealView.py | 12 +- src/dev/arm/SMMUv3.py | 4 +- src/dev/net/Ethernet.py | 2 +- src/dev/pci/PciHost.py | 2 +- src/dev/serial/Uart.py | 2 +- src/gpu-compute/GPU.py | 42 +++---- src/gpu-compute/LdsState.py | 2 +- src/learning_gem5/part2/HelloObject.py | 4 +- src/mem/AddrMapper.py | 4 +- src/mem/Bridge.py | 4 +- src/mem/CommMonitor.py | 16 +-- src/mem/DRAMInterface.py | 4 +- src/mem/DRAMsim3.py | 4 +- src/mem/ExternalSlave.py | 2 +- src/mem/MemChecker.py | 8 +- src/mem/MemCtrl.py | 6 +- src/mem/MemDelay.py | 4 +- src/mem/MemInterface.py | 6 +- src/mem/PortTerminator.py | 4 +- src/mem/SerialLink.py | 6 +- src/mem/XBar.py | 10 +- src/mem/cache/Cache.py | 6 +- src/mem/cache/compressors/Compressors.py | 2 +- src/mem/cache/tags/Tags.py | 2 +- src/mem/probes/StackDistProbe.py | 2 +- src/mem/ruby/slicc_interface/Controller.py | 2 +- src/mem/ruby/system/GPUCoalescer.py | 2 +- src/mem/slicc/symbols/StateMachine.py | 2 +- .../gem5/components/memory/dramsim_3.py | 2 +- src/python/m5/SimObject.py | 4 +- src/python/m5/params.py | 2 +- src/sim/System.py | 4 +- src/systemc/tests/verify.py | 4 +- tests/configs/gpu-randomtest-ruby.py | 4 +- tests/configs/gpu-ruby.py | 12 +- tests/gem5/fixture.py | 4 +- util/cpt_upgraders/isa-is-simobject.py | 2 +- util/gerrit-bot/extract_gitcookies.py | 2 +- util/maint/list_changes.py | 6 +- util/maint/show_changes_by_file.py | 4 +- util/plot_dram/dram_sweep_plot.py | 2 +- 66 files changed, 256 insertions(+), 268 deletions(-) diff --git a/build_tools/cxx_config_cc.py b/build_tools/cxx_config_cc.py index a908aa8c17..33d3bba864 100644 --- a/build_tools/cxx_config_cc.py +++ b/build_tools/cxx_config_cc.py @@ -255,9 +255,7 @@ for param in sim_object._params.values(): code('} else if (name == "${{param.name}}") {') code.indent() code("${{param.name}}.clear();") - code( - "for (auto i = values.begin(); " "ret && i != values.end(); i ++)" - ) + code("for (auto i = values.begin(); ret && i != values.end(); i ++)") code("{") code.indent() code("${{param.ptype.cxx_type}} elem;") diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py index 476e49d750..cd192c56fb 100644 --- a/build_tools/enum_cc.py +++ b/build_tools/enum_cc.py @@ -87,7 +87,7 @@ namespace gem5 ) if enum.wrapper_is_struct: - code("const char *${wrapper_name}::${name}Strings" "[Num_${name}] =") + code("const char *${wrapper_name}::${name}Strings[Num_${name}] =") else: if enum.is_class: code( diff --git a/configs/common/Options.py b/configs/common/Options.py index 81d7791285..5585a75b80 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -217,7 +217,7 @@ def addNoISAOptions(parser): "--maxtime", type=float, default=None, - help="Run to the specified absolute simulated time in " "seconds", + help="Run to the specified absolute simulated time in seconds", ) parser.add_argument( "-P", @@ -691,7 +691,7 @@ def addSEOptions(parser): "-o", "--options", default="", - help="""The options to pass to the binary, use " " + help="""The options to pass to the binary, use around the entire string""", ) parser.add_argument( diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index 8e8bc60fe8..c2b97fd82e 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -85,7 +85,7 @@ parser.add_argument( "--cu-per-sqc", type=int, default=4, - help="number of CUs" "sharing an SQC (icache, and thus icache TLB)", + help="number of CUssharing an SQC (icache, and thus icache TLB)", ) parser.add_argument( "--cu-per-scalar-cache", @@ -94,7 +94,7 @@ parser.add_argument( help="Number of CUs sharing a scalar cache", ) parser.add_argument( - "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU" + "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU" ) parser.add_argument( "--cu-per-sa", @@ -140,13 +140,13 @@ parser.add_argument( "--glbmem-wr-bus-width", type=int, default=32, - help="VGPR to Coalescer (Global Memory) data bus width " "in bytes", + help="VGPR to Coalescer (Global Memory) data bus width in bytes", ) parser.add_argument( "--glbmem-rd-bus-width", type=int, default=32, - help="Coalescer to VGPR (Global Memory) data bus width in " "bytes", + help="Coalescer to VGPR (Global Memory) data bus width in bytes", ) # Currently we only support 1 local memory pipe parser.add_argument( @@ -166,7 +166,7 @@ parser.add_argument( "--wfs-per-simd", type=int, default=10, - help="Number of " "WF slots per SIMD", + help="Number of WF slots per SIMD", ) parser.add_argument( @@ -290,13 +290,11 @@ parser.add_argument( help="Latency for scalar responses from ruby to the cu.", ) -parser.add_argument( - "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs" -) +parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for TLBs") parser.add_argument( "--pf-type", type=str, - help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE", + help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE", ) parser.add_argument("--pf-stride", type=int, help="set prefetch stride") parser.add_argument( @@ -369,7 +367,7 @@ parser.add_argument( type=str, default="gfx801", choices=GfxVersion.vals, - help="Gfx version for gpu" "Note: gfx902 is not fully supported by ROCm", + help="Gfx version for gpuNote: gfx902 is not fully supported by ROCm", ) Ruby.define_options(parser) diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py index 9eeba37ff7..a8db6bacd4 100644 --- a/configs/example/arm/baremetal.py +++ b/configs/example/arm/baremetal.py @@ -242,25 +242,25 @@ def main(): "--semi-stdin", type=str, default="stdin", - help="Standard input for semihosting " "(default: gem5's stdin)", + help="Standard input for semihosting (default: gem5's stdin)", ) parser.add_argument( "--semi-stdout", type=str, default="stdout", - help="Standard output for semihosting " "(default: gem5's stdout)", + help="Standard output for semihosting (default: gem5's stdout)", ) parser.add_argument( "--semi-stderr", type=str, default="stderr", - help="Standard error for semihosting " "(default: gem5's stderr)", + help="Standard error for semihosting (default: gem5's stderr)", ) parser.add_argument( "--semi-path", type=str, default="", - help=("Search path for files to be loaded through " "Arm Semihosting"), + help=("Search path for files to be loaded through Arm Semihosting"), ) parser.add_argument( "args", diff --git a/configs/example/arm/dist_bigLITTLE.py b/configs/example/arm/dist_bigLITTLE.py index a3f3ede4eb..2884a5efd5 100644 --- a/configs/example/arm/dist_bigLITTLE.py +++ b/configs/example/arm/dist_bigLITTLE.py @@ -51,7 +51,7 @@ import sw def addOptions(parser): # Options for distributed simulation (i.e. dist-gem5) parser.add_argument( - "--dist", action="store_true", help="Distributed gem5" " simulation." + "--dist", action="store_true", help="Distributed gem5 simulation." ) parser.add_argument( "--is-switch", @@ -71,14 +71,14 @@ def addOptions(parser): default=0, action="store", type=int, - help="Number of gem5 processes within the dist gem5" " run.", + help="Number of gem5 processes within the dist gem5 run.", ) parser.add_argument( "--dist-server-name", default="127.0.0.1", action="store", type=str, - help="Name of the message server host\nDEFAULT:" " localhost", + help="Name of the message server host\nDEFAULT: localhost", ) parser.add_argument( "--dist-server-port", diff --git a/configs/example/gpufs/amd/AmdGPUOptions.py b/configs/example/gpufs/amd/AmdGPUOptions.py index 531249ee84..3d6a8cc48e 100644 --- a/configs/example/gpufs/amd/AmdGPUOptions.py +++ b/configs/example/gpufs/amd/AmdGPUOptions.py @@ -49,7 +49,7 @@ def addAmdGPUOptions(parser): "--cu-per-sqc", type=int, default=4, - help="number of CUs sharing an SQC" " (icache, and thus icache TLB)", + help="number of CUs sharing an SQC (icache, and thus icache TLB)", ) parser.add_argument( "--cu-per-scalar-cache", @@ -102,19 +102,19 @@ def addAmdGPUOptions(parser): "--issue-period", type=int, default=4, - help="Number of cycles per vector instruction issue" " period", + help="Number of cycles per vector instruction issue period", ) parser.add_argument( "--glbmem-wr-bus-width", type=int, default=32, - help="VGPR to Coalescer (Global Memory) data bus width" " in bytes", + help="VGPR to Coalescer (Global Memory) data bus width in bytes", ) parser.add_argument( "--glbmem-rd-bus-width", type=int, default=32, - help="Coalescer to VGPR (Global Memory) data bus width" " in bytes", + help="Coalescer to VGPR (Global Memory) data bus width in bytes", ) # Currently we only support 1 local memory pipe parser.add_argument( @@ -204,20 +204,20 @@ def addAmdGPUOptions(parser): parser.add_argument( "--LocalMemBarrier", action="store_true", - help="Barrier does not wait for writethroughs to " " complete", + help="Barrier does not wait for writethroughs to complete", ) parser.add_argument( "--countPages", action="store_true", - help="Count Page Accesses and output in " " per-CU output files", + help="Count Page Accesses and output in per-CU output files", ) parser.add_argument( - "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs" + "--TLB-prefetch", type=int, help="prefetch depth for TLBs" ) parser.add_argument( "--pf-type", type=str, - help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE", + help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE", ) parser.add_argument("--pf-stride", type=int, help="set prefetch stride") parser.add_argument( diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py index 781ce8e27c..86b91034b0 100644 --- a/configs/example/gpufs/runfs.py +++ b/configs/example/gpufs/runfs.py @@ -110,13 +110,13 @@ def addRunFSOptions(parser): action="store", type=str, default="16GB", - help="Specify the dGPU physical memory" " size", + help="Specify the dGPU physical memory size", ) parser.add_argument( "--dgpu-num-dirs", type=int, default=1, - help="Set " "the number of dGPU directories (memory controllers", + help="Set the number of dGPU directories (memory controllers", ) parser.add_argument( "--dgpu-mem-type", diff --git a/src/arch/arm/ArmFsWorkload.py b/src/arch/arm/ArmFsWorkload.py index d0dcde749a..a9474fe119 100644 --- a/src/arch/arm/ArmFsWorkload.py +++ b/src/arch/arm/ArmFsWorkload.py @@ -78,10 +78,10 @@ class ArmFsWorkload(KernelWorkload): ) panic_on_panic = Param.Bool( - False, "Trigger a gem5 panic if the " "guest kernel panics" + False, "Trigger a gem5 panic if the guest kernel panics" ) panic_on_oops = Param.Bool( - False, "Trigger a gem5 panic if the " "guest kernel oopses" + False, "Trigger a gem5 panic if the guest kernel oopses" ) diff --git a/src/arch/arm/ArmPMU.py b/src/arch/arm/ArmPMU.py index 80288ded57..f21aaff634 100644 --- a/src/arch/arm/ArmPMU.py +++ b/src/arch/arm/ArmPMU.py @@ -88,7 +88,7 @@ class ArmPMU(SimObject): or isinstance(newObject, SoftwareIncrement) ): raise TypeError( - "argument must be of ProbeEvent or " "SoftwareIncrement type" + "argument must be of ProbeEvent or SoftwareIncrement type" ) if not self._events: diff --git a/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py b/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py index 577fd535d8..f690fb5097 100644 --- a/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py +++ b/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py @@ -50,7 +50,7 @@ class FastModelCortexA76(IrisBaseCPU): redistributor = Gicv3CommsTargetSocket("GIC communication target") core_reset = IntSinkPin( - "Raising this signal will put the core into " "reset mode." + "Raising this signal will put the core into reset mode." ) poweron_reset = IntSinkPin( "Power on reset. Initializes all the " diff --git a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py index 1e267f028f..fe81e72bd0 100644 --- a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py +++ b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py @@ -49,7 +49,7 @@ class FastModelCortexR52(IrisBaseCPU): flash = AmbaInitiatorSocket(64, "Flash") amba = AmbaInitiatorSocket(64, "AMBA initiator socket") core_reset = IntSinkPin( - "Raising this signal will put the core into " "reset mode." + "Raising this signal will put the core into reset mode." ) poweron_reset = IntSinkPin( "Power on reset. Initializes all the " @@ -68,7 +68,7 @@ class FastModelCortexR52(IrisBaseCPU): CFGTE = Param.Bool(False, "Equivalent to CFGTHUMBEXCEPTIONS") RVBARADDR = Param.UInt32(0, "Equivalent to CFGVECTABLE") ase_present = Param.Bool( - True, "Set whether the model has been built " "with NEON support" + True, "Set whether the model has been built with NEON support" ) dcache_size = Param.UInt16(0x8000, "L1 D-Cache size in bytes") flash_enable = Param.Bool(False, "Equivalent to CFGFLASHEN") @@ -88,33 +88,33 @@ class FastModelCortexR52(IrisBaseCPU): "(0=off=default,1=syncState,2=postInsnIO,3=postInsnAll)", ) semihosting_A32_HLT = Param.UInt16( - 0xF000, "A32 HLT number for " "semihosting calls." + 0xF000, "A32 HLT number for semihosting calls." ) semihosting_ARM_SVC = Param.UInt32( - 0x123456, "A32 SVC number for " "semihosting calls." + 0x123456, "A32 SVC number for semihosting calls." ) semihosting_T32_HLT = Param.UInt8( - 60, "T32 HLT number for semihosting " "calls." + 60, "T32 HLT number for semihosting calls." ) semihosting_Thumb_SVC = Param.UInt8( - 171, "T32 SVC number for " "semihosting calls." + 171, "T32 SVC number for semihosting calls." ) semihosting_cmd_line = Param.String( - "", "Command line available to " "semihosting calls." + "", "Command line available to semihosting calls." ) semihosting_cwd = Param.String( - "", "Base directory for semihosting " "file access." + "", "Base directory for semihosting file access." ) semihosting_enable = Param.Bool(True, "Enable semihosting SVC/HLT traps.") semihosting_heap_base = Param.UInt32(0, "Virtual address of heap base.") semihosting_heap_limit = Param.UInt32( - 0xF000000, "Virtual address of " "top of heap." + 0xF000000, "Virtual address of top of heap." ) semihosting_stack_base = Param.UInt32( - 0x10000000, "Virtual address of " "base of descending stack." + 0x10000000, "Virtual address of base of descending stack." ) semihosting_stack_limit = Param.UInt32( - 0xF000000, "Virtual address of " "stack limit." + 0xF000000, "Virtual address of stack limit." ) tcm_a_enable = Param.Bool(False, "Equivalent to CFGTCMBOOT") tcm_a_size = Param.UInt32(0x4000, "Sets the size of the ATCM(in bytes)") @@ -122,7 +122,7 @@ class FastModelCortexR52(IrisBaseCPU): tcm_c_size = Param.UInt32(0x2000, "Sets the size of the CTCM(in bytes)") vfp_dp_present = Param.Bool( True, - "Whether double-precision floating " "point feature is implemented", + "Whether double-precision floating point feature is implemented", ) vfp_enable_at_reset = Param.Bool( False, @@ -171,10 +171,10 @@ class FastModelCortexR52Cluster(SimObject): PERIPHBASE = Param.UInt32(0x13080000, "Equivalent to CFGPERIPHBASE") cluster_utid = Param.UInt8(0, "Equivalent to CFGCLUSTERUTID") cpi_div = Param.UInt32( - 1, "Divider for calculating CPI " "(Cycles Per Instruction)" + 1, "Divider for calculating CPI (Cycles Per Instruction)" ) cpi_mul = Param.UInt32( - 1, "Multiplier for calculating CPI " "(Cycles Per Instruction)" + 1, "Multiplier for calculating CPI (Cycles Per Instruction)" ) dcache_prefetch_enabled = Param.Bool( False, @@ -194,7 +194,7 @@ class FastModelCortexR52Cluster(SimObject): "upstream bus, this is only used when dcache-state_modelled=true.", ) dcache_state_modelled = Param.Bool( - False, "Set whether D-cache has " "stateful implementation" + False, "Set whether D-cache has stateful implementation" ) dcache_write_access_latency = Param.UInt64( 0, @@ -206,7 +206,7 @@ class FastModelCortexR52Cluster(SimObject): "is only used when dcache-state_modelled=true.", ) flash_protection_enable_at_reset = Param.Bool( - False, "Equivalent to " "CFGFLASHPROTEN" + False, "Equivalent to CFGFLASHPROTEN" ) has_flash_protection = Param.Bool(True, "Equivalent to CFGFLASHPROTIMP") icache_prefetch_enabled = Param.Bool( @@ -227,7 +227,7 @@ class FastModelCortexR52Cluster(SimObject): "upstream bus, this is only used when icache-state_modelled=true.", ) icache_state_modelled = Param.Bool( - False, "Set whether I-cache has " "stateful implementation" + False, "Set whether I-cache has stateful implementation" ) memory_ext_slave_base = Param.UInt32(0, "Equivalent to CFGAXISTCMBASEADDR") memory_flash_base = Param.UInt32(0, "Equivalent to CFGFLASHBASEADDR") @@ -237,16 +237,16 @@ class FastModelCortexR52Cluster(SimObject): "memory.flash_size = 0 => CFGFLASHIMP = false", ) num_protection_regions_s1 = Param.UInt8( - 16, "Number of v8-R stage1 " "protection regions" + 16, "Number of v8-R stage1 protection regions" ) num_protection_regions_s2 = Param.UInt8( - 16, "Number of v8-R hyp " "protection regions" + 16, "Number of v8-R hyp protection regions" ) num_spi = Param.UInt16( - 960, "Number of interrupts (SPI) into the " "internal GIC controller" + 960, "Number of interrupts (SPI) into the internal GIC controller" ) ram_protection_enable_at_reset = Param.Bool( - False, "Equivalent to " "CFGRAMPROTEN" + False, "Equivalent to CFGRAMPROTEN" ) has_export_m_port = Param.Bool( True, diff --git a/src/arch/arm/fastmodel/GIC/FastModelGIC.py b/src/arch/arm/fastmodel/GIC/FastModelGIC.py index 1ad5a979cd..ce0a8c5fb4 100644 --- a/src/arch/arm/fastmodel/GIC/FastModelGIC.py +++ b/src/arch/arm/fastmodel/GIC/FastModelGIC.py @@ -90,10 +90,10 @@ class SCFastModelGIC(SystemC_ScModule): vPEID_bits = Param.Unsigned(16, "Number of bits of vPEID with GICv4.1.") print_mmap = Param.Bool(False, "Print memory map to stdout") monolithic = Param.Bool( - False, "Indicate that the implementation is not " "distributed" + False, "Indicate that the implementation is not distributed" ) direct_lpi_support = Param.Bool( - False, "Enable support for LPI " "operations through GICR registers" + False, "Enable support for LPI operations through GICR registers" ) cpu_affinities = Param.String( "", @@ -124,42 +124,42 @@ class SCFastModelGIC(SystemC_ScModule): "SPI signalling register aliases(0:Disabled)", ) has_two_security_states = Param.Bool( - True, "If true, has two security " "states" + True, "If true, has two security states" ) DS_fixed_to_zero = Param.Bool( - False, "Enable/disable support of single " "security state" + False, "Enable/disable support of single security state" ) IIDR = Param.UInt32(0x0, "GICD_IIDR and GICR_IIDR value") gicv2_only = Param.Bool( False, - "If true, when using the GICv3 model, " "pretend to be a GICv2 system", + "If true, when using the GICv3 model, pretend to be a GICv2 system", ) STATUSR_implemented = Param.Bool( - True, "Determines whether the " "GICR_STATUSR register is implemented." + True, "Determines whether the GICR_STATUSR register is implemented." ) priority_bits_implemented = Param.Unsigned( - 5, "Number of implemented " "priority bits" + 5, "Number of implemented priority bits" ) itargets_razwi = Param.Bool( - False, "If true, the GICD_ITARGETS registers " "are RAZ/WI" + False, "If true, the GICD_ITARGETS registers are RAZ/WI" ) icfgr_sgi_mask = Param.UInt32( - 0x0, "Mask for writes to ICFGR registers " "that configure SGIs" + 0x0, "Mask for writes to ICFGR registers that configure SGIs" ) icfgr_ppi_mask = Param.UInt32( - 0xAAAAAAAA, "Mask for writes to ICFGR " "registers that configure PPIs" + 0xAAAAAAAA, "Mask for writes to ICFGR registers that configure PPIs" ) icfgr_spi_mask = Param.UInt32( - 0xAAAAAAAA, "Mask for writes to ICFGR " "registers that configure SPIs" + 0xAAAAAAAA, "Mask for writes to ICFGR registers that configure SPIs" ) icfgr_sgi_reset = Param.UInt32( - 0xAAAAAAAA, "Reset value for ICFGR " "registers that configure SGIs" + 0xAAAAAAAA, "Reset value for ICFGR registers that configure SGIs" ) icfgr_ppi_reset = Param.UInt32( - 0x0, "Reset value for ICFGR regesters " "that configure PPIs" + 0x0, "Reset value for ICFGR regesters that configure PPIs" ) icfgr_spi_reset = Param.UInt32( - 0x0, "Reset value for ICFGR regesters " "that configure SPIs" + 0x0, "Reset value for ICFGR regesters that configure SPIs" ) icfgr_ppi_rsvd_bit = Param.Bool( False, @@ -167,16 +167,16 @@ class SCFastModelGIC(SystemC_ScModule): "bits i.e. bit 0,2,4..30 of ICFGRn for n>0", ) igroup_sgi_mask = Param.UInt16( - 0xFFFF, "Mask for writes to SGI bits in " "IGROUP registers" + 0xFFFF, "Mask for writes to SGI bits in IGROUP registers" ) igroup_ppi_mask = Param.UInt16( - 0xFFFF, "Mask for writes to PPI bits in " "IGROUP registers" + 0xFFFF, "Mask for writes to PPI bits in IGROUP registers" ) igroup_sgi_reset = Param.UInt16( - 0x0, "Reset value for SGI bits in IGROUP " "registers" + 0x0, "Reset value for SGI bits in IGROUP registers" ) igroup_ppi_reset = Param.UInt16( - 0x0, "Reset value for SGI bits in IGROUP " "registers" + 0x0, "Reset value for SGI bits in IGROUP registers" ) ppi_implemented_mask = Param.UInt16( 0xFFFF, @@ -220,16 +220,16 @@ class SCFastModelGIC(SystemC_ScModule): "to be instantiated (0=none)", ) its0_base = Param.Addr( - 0, "Register base address for ITS0 " "(automatic if 0)." + 0, "Register base address for ITS0 (automatic if 0)." ) its1_base = Param.Addr( - 0, "Register base address for ITS1 " "(automatic if 0)." + 0, "Register base address for ITS1 (automatic if 0)." ) its2_base = Param.Addr( - 0, "Register base address for ITS2 " "(automatic if 0)." + 0, "Register base address for ITS2 (automatic if 0)." ) its3_base = Param.Addr( - 0, "Register base address for ITS3 " "(automatic if 0)." + 0, "Register base address for ITS3 (automatic if 0)." ) gits_pidr = Param.UInt64( 0x0, @@ -286,52 +286,52 @@ class SCFastModelGIC(SystemC_ScModule): "2 = Virtual Processors; 3 = Physical Processors; 4 = Collections", ) gits_baser0_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER0 register." + 8, "Number of bytes required per entry for GITS_BASER0 register." ) gits_baser1_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER1 register." + 8, "Number of bytes required per entry for GITS_BASER1 register." ) gits_baser2_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER2 register." + 8, "Number of bytes required per entry for GITS_BASER2 register." ) gits_baser3_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER3 register." + 8, "Number of bytes required per entry for GITS_BASER3 register." ) gits_baser4_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER4 register." + 8, "Number of bytes required per entry for GITS_BASER4 register." ) gits_baser5_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER5 register." + 8, "Number of bytes required per entry for GITS_BASER5 register." ) gits_baser6_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER6 register." + 8, "Number of bytes required per entry for GITS_BASER6 register." ) gits_baser7_entry_bytes = Param.Unsigned( - 8, "Number of bytes required per " "entry for GITS_BASER7 register." + 8, "Number of bytes required per entry for GITS_BASER7 register." ) gits_baser0_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER0 register is RAZ/WI." + False, "Indirect field for GITS_BASER0 register is RAZ/WI." ) gits_baser1_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER1 register is RAZ/WI." + False, "Indirect field for GITS_BASER1 register is RAZ/WI." ) gits_baser2_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER2 register is RAZ/WI." + False, "Indirect field for GITS_BASER2 register is RAZ/WI." ) gits_baser3_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER3 register is RAZ/WI." + False, "Indirect field for GITS_BASER3 register is RAZ/WI." ) gits_baser4_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER4 register is RAZ/WI." + False, "Indirect field for GITS_BASER4 register is RAZ/WI." ) gits_baser5_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER5 register is RAZ/WI." + False, "Indirect field for GITS_BASER5 register is RAZ/WI." ) gits_baser6_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER6 register is RAZ/WI." + False, "Indirect field for GITS_BASER6 register is RAZ/WI." ) gits_baser7_indirect_raz = Param.Bool( - False, "Indirect field for " "GITS_BASER7 register is RAZ/WI." + False, "Indirect field for GITS_BASER7 register is RAZ/WI." ) its_baser_force_page_alignement = Param.Bool( True, @@ -352,25 +352,25 @@ class SCFastModelGIC(SystemC_ScModule): "port rather than an AXI4 port).", ) a3_affinity_supported = Param.Bool( - False, "Device supports affinity " "level 3 values that are non-zero." + False, "Device supports affinity level 3 values that are non-zero." ) SGI_RSS_support = Param.Bool( - False, "Device has support for the Range " "Selector feature for SGI" + False, "Device has support for the Range Selector feature for SGI" ) gicr_propbaser_read_only = Param.Bool( - False, "GICR_PROPBASER register is " "read-only." + False, "GICR_PROPBASER register is read-only." ) gicr_propbaser_reset = Param.UInt64( - 0x0, "Value of GICR_PROPBASER on " "reset." + 0x0, "Value of GICR_PROPBASER on reset." ) its_device_bits = Param.Unsigned( - 16, "Number of bits supported for ITS " "device IDs." + 16, "Number of bits supported for ITS device IDs." ) its_entry_size = Param.Unsigned( - 8, "Number of bytes required to store " "each entry in the ITT tables." + 8, "Number of bytes required to store each entry in the ITT tables." ) its_id_bits = Param.Unsigned( - 16, "Number of interrupt bits supported by " "ITS." + 16, "Number of interrupt bits supported by ITS." ) its_collection_id_bits = Param.Unsigned( 0, @@ -386,7 +386,7 @@ class SCFastModelGIC(SystemC_ScModule): "the number supported in memory only. Irrelevant when HCC=0", ) delay_ITS_accesses = Param.Bool( - True, "Delay accesses from the ITS until " "GICR_SYNCR is read." + True, "Delay accesses from the ITS until GICR_SYNCR is read." ) local_SEIs = Param.Bool(False, "Generate SEI to signal internal issues") local_VSEIs = Param.Bool(False, "Generate VSEI to signal internal issues") @@ -397,10 +397,10 @@ class SCFastModelGIC(SystemC_ScModule): "for distributed implementations", ) ITS_hardware_collection_count = Param.Unsigned( - 0, "Number of hardware " "collections held exclusively in the ITS" + 0, "Number of hardware collections held exclusively in the ITS" ) ITS_MOVALL_update_collections = Param.Bool( - False, "Whether MOVALL command " "updates the collection entires" + False, "Whether MOVALL command updates the collection entires" ) ITS_TRANSLATE64R = Param.Bool( False, @@ -409,7 +409,7 @@ class SCFastModelGIC(SystemC_ScModule): "interupt[31:0])", ) enable_protocol_checking = Param.Bool( - False, "Enable/disable protocol " "checking at cpu interface" + False, "Enable/disable protocol checking at cpu interface" ) fixed_routed_spis = Param.String( "", @@ -470,13 +470,13 @@ class SCFastModelGIC(SystemC_ScModule): ) virtual_lpi_support = Param.Bool( False, - "GICv4 Virtual LPIs and Direct " "injection of Virtual LPIs supported", + "GICv4 Virtual LPIs and Direct injection of Virtual LPIs supported", ) virtual_priority_bits = Param.Unsigned( - 5, "Number of implemented virtual " "priority bits" + 5, "Number of implemented virtual priority bits" ) LPI_cache_type = Param.Unsigned( - 1, "Cache type for LPIs, 0:No caching, " "1:Full caching" + 1, "Cache type for LPIs, 0:No caching, 1:Full caching" ) LPI_cache_check_data = Param.Bool( False, @@ -490,7 +490,7 @@ class SCFastModelGIC(SystemC_ScModule): ) DPG_ARE_only = Param.Bool( False, - "Limit application of DPG bits to " "interrupt groups for which ARE=1", + "Limit application of DPG bits to interrupt groups for which ARE=1", ) ARE_fixed_to_one = Param.Bool( False, @@ -498,7 +498,7 @@ class SCFastModelGIC(SystemC_ScModule): "supported and GICD_CTLR.ARE_* is always one", ) legacy_sgi_enable_rao = Param.Bool( - False, "Enables for SGI associated " "with an ARE=0 regime are RAO/WI" + False, "Enables for SGI associated with an ARE=0 regime are RAO/WI" ) pa_size = Param.Unsigned(48, "Number of valid bits in physical address") MSI_IIDR = Param.UInt32(0x0, "Value returned in MSI_IIDR registers.") @@ -763,7 +763,7 @@ class SCFastModelGIC(SystemC_ScModule): "the core fast model.", ) SPI_MBIS = Param.Bool( - True, "Distributor supports meassage based " "signaling of SPI" + True, "Distributor supports meassage based signaling of SPI" ) SPI_unimplemented = Param.String( "", @@ -772,7 +772,7 @@ class SCFastModelGIC(SystemC_ScModule): "'35, 39-42, 73)'", ) irm_razwi = Param.Bool( - False, "GICD_IROUTERn.InterruptRoutingMode is " "RAZ/WI" + False, "GICD_IROUTERn.InterruptRoutingMode is RAZ/WI" ) common_LPI_configuration = Param.Unsigned( 0, @@ -810,7 +810,7 @@ class SCFastModelGIC(SystemC_ScModule): "transient loading state when valid=1", ) allow_LPIEN_clear = Param.Bool( - False, "Allow RW behaviour on " "GICR_CTLR.LPIEN isntead of set once" + False, "Allow RW behaviour on GICR_CTLR.LPIEN isntead of set once" ) GICD_legacy_reg_reserved = Param.Bool( False, diff --git a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py index d6c515cf44..ad43fed237 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py +++ b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py @@ -87,7 +87,7 @@ class FastModelPL330(SystemC_ScModule): cache_lines = Param.UInt32(1, "number of cache lines") max_channels = Param.UInt32(8, "virtual channels") controller_nsecure = Param.Bool( - False, "Controller non-secure at reset " "(boot_manager_ns)" + False, "Controller non-secure at reset (boot_manager_ns)" ) irq_nsecure = Param.UInt32(0, "Interrupts non-secure at reset") periph_nsecure = Param.Bool(False, "Peripherals non-secure at reset") diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py index 62f33828a1..aff3c9f63c 100755 --- a/src/arch/isa_parser/isa_parser.py +++ b/src/arch/isa_parser/isa_parser.py @@ -133,7 +133,7 @@ class Template(object): """ pcstate_decl = ( - f"{self.parser.namespace}::PCState " "__parserAutoPCState;\n" + f"{self.parser.namespace}::PCState __parserAutoPCState;\n" ) myDict["op_decl"] = operands.concatAttrStrings("op_decl") if operands.readPC or operands.setPC: @@ -1098,7 +1098,7 @@ del wrap # 'def [signed] bitfield [:]' # This generates a preprocessor macro in the output file. def p_def_bitfield_0(self, t): - "def_bitfield : DEF opt_signed " "BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI" + "def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI" expr = "bits(machInst, %2d, %2d)" % (t[6], t[8]) if t[2] == "signed": expr = "sext<%d>(%s)" % (t[6] - t[8] + 1, expr) diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 00374d8e54..438d4f45df 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -228,7 +228,7 @@ class BaseCPU(ClockedObject): else: if len(self.isa) != int(self.numThreads): raise RuntimeError( - "Number of ISA instances doesn't " "match thread count" + "Number of ISA instances doesn't match thread count" ) if len(self.decoder) != 0: raise RuntimeError("Decoders should not be set up manually") diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py index 7d15ec4cd4..bcdab1bad5 100644 --- a/src/cpu/minor/BaseMinorCPU.py +++ b/src/cpu/minor/BaseMinorCPU.py @@ -67,7 +67,7 @@ class MinorOpClassSet(SimObject): cxx_class = "gem5::MinorOpClassSet" opClasses = VectorParam.MinorOpClass( - [], "op classes to be matched." " An empty list means any class" + [], "op classes to be matched. An empty list means any class" ) @@ -83,13 +83,13 @@ class MinorFUTiming(SimObject): " (ext_mach_inst & mask) == match", ) suppress = Param.Bool( - False, "if true, this inst. is not executed by" " this FU" + False, "if true, this inst. is not executed by this FU" ) extraCommitLat = Param.Cycles( - 0, "extra cycles to stall commit for" " this inst." + 0, "extra cycles to stall commit for this inst." ) extraCommitLatExpr = Param.TimingExpr( - NULL, "extra cycles as a" " run-time evaluated expression" + NULL, "extra cycles as a run-time evaluated expression" ) extraAssumedLat = Param.Cycles( 0, @@ -109,7 +109,7 @@ class MinorFUTiming(SimObject): " class", ) description = Param.String( - "", "description string of the decoding/inst." " class" + "", "description string of the decoding/inst class" ) @@ -129,11 +129,11 @@ class MinorFU(SimObject): opClasses = Param.MinorOpClassSet( MinorOpClassSet(), - "type of operations" " allowed on this functional unit", + "type of operations allowed on this functional unit", ) opLat = Param.Cycles(1, "latency in cycles") issueLat = Param.Cycles( - 1, "cycles until another instruction can be" " issued" + 1, "cycles until another instruction can be issued" ) timings = VectorParam.MinorFUTiming([], "extra decoding rules") diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py index 79bd884b87..07d9df6b7f 100644 --- a/src/cpu/o3/BaseO3CPU.py +++ b/src/cpu/o3/BaseO3CPU.py @@ -79,47 +79,43 @@ class BaseO3CPU(BaseCPU): activity = Param.Unsigned(0, "Initial count") cacheStorePorts = Param.Unsigned( - 200, "Cache Ports. " "Constrains stores only." - ) - cacheLoadPorts = Param.Unsigned( - 200, "Cache Ports. " "Constrains loads only." + 200, "Cache Ports. Constrains stores only." ) + cacheLoadPorts = Param.Unsigned(200, "Cache Ports. Constrains loads only.") decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay") renameToFetchDelay = Param.Cycles(1, "Rename to fetch delay") - iewToFetchDelay = Param.Cycles( - 1, "Issue/Execute/Writeback to fetch " "delay" - ) + iewToFetchDelay = Param.Cycles(1, "Issue/Execute/Writeback to fetch delay") commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay") fetchWidth = Param.Unsigned(8, "Fetch width") fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes") fetchQueueSize = Param.Unsigned( - 32, "Fetch queue size in micro-ops " "per-thread" + 32, "Fetch queue size in micro-ops per-thread" ) renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay") iewToDecodeDelay = Param.Cycles( - 1, "Issue/Execute/Writeback to decode " "delay" + 1, "Issue/Execute/Writeback to decode delay" ) commitToDecodeDelay = Param.Cycles(1, "Commit to decode delay") fetchToDecodeDelay = Param.Cycles(1, "Fetch to decode delay") decodeWidth = Param.Unsigned(8, "Decode width") iewToRenameDelay = Param.Cycles( - 1, "Issue/Execute/Writeback to rename " "delay" + 1, "Issue/Execute/Writeback to rename delay" ) commitToRenameDelay = Param.Cycles(1, "Commit to rename delay") decodeToRenameDelay = Param.Cycles(1, "Decode to rename delay") renameWidth = Param.Unsigned(8, "Rename width") commitToIEWDelay = Param.Cycles( - 1, "Commit to " "Issue/Execute/Writeback delay" + 1, "Commit to Issue/Execute/Writeback delay" ) renameToIEWDelay = Param.Cycles( - 2, "Rename to " "Issue/Execute/Writeback delay" + 2, "Rename to Issue/Execute/Writeback delay" ) issueToExecuteDelay = Param.Cycles( - 1, "Issue to execute delay (internal " "to the IEW stage)" + 1, "Issue to execute delay (internal to the IEW stage)" ) dispatchWidth = Param.Unsigned(8, "Dispatch width") issueWidth = Param.Unsigned(8, "Issue width") @@ -127,7 +123,7 @@ class BaseO3CPU(BaseCPU): fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool") iewToCommitDelay = Param.Cycles( - 1, "Issue/Execute/Writeback to commit " "delay" + 1, "Issue/Execute/Writeback to commit delay" ) renameToROBDelay = Param.Cycles(1, "Rename to reorder buffer delay") commitWidth = Param.Unsigned(8, "Commit width") @@ -166,13 +162,11 @@ class BaseO3CPU(BaseCPU): 256, "Number of physical integer registers" ) numPhysFloatRegs = Param.Unsigned( - 256, "Number of physical floating point " "registers" - ) - numPhysVecRegs = Param.Unsigned( - 256, "Number of physical vector " "registers" + 256, "Number of physical floating point registers" ) + numPhysVecRegs = Param.Unsigned(256, "Number of physical vector registers") numPhysVecPredRegs = Param.Unsigned( - 32, "Number of physical predicate " "registers" + 32, "Number of physical predicate registers" ) # most ISAs don't use condition-code regs, so default is 0 numPhysCCRegs = Param.Unsigned(0, "Number of physical cc registers") diff --git a/src/cpu/o3/probe/ElasticTrace.py b/src/cpu/o3/probe/ElasticTrace.py index ca4fa4ec46..73e632f647 100644 --- a/src/cpu/o3/probe/ElasticTrace.py +++ b/src/cpu/o3/probe/ElasticTrace.py @@ -44,10 +44,10 @@ class ElasticTrace(ProbeListenerObject): # Trace files for the following params are created in the output directory. # User is forced to provide these when an instance of this class is created. instFetchTraceFile = Param.String( - desc="Protobuf trace file name for " "instruction fetch tracing" + desc="Protobuf trace file name for instruction fetch tracing" ) dataDepTraceFile = Param.String( - desc="Protobuf trace file name for " "data dependency tracing" + desc="Protobuf trace file name for data dependency tracing" ) # The dependency window size param must be equal to or greater than the # number of entries in the O3CPU ROB, a typical value is 3 times ROB size @@ -66,5 +66,5 @@ class ElasticTrace(ProbeListenerObject): ) # Whether to trace virtual addresses for memory accesses traceVirtAddr = Param.Bool( - False, "Set to true if virtual addresses are " "to be traced." + False, "Set to true if virtual addresses are to be traced." ) diff --git a/src/cpu/testers/memtest/MemTest.py b/src/cpu/testers/memtest/MemTest.py index e8492b5402..24bd974804 100644 --- a/src/cpu/testers/memtest/MemTest.py +++ b/src/cpu/testers/memtest/MemTest.py @@ -70,7 +70,7 @@ class MemTest(ClockedObject): 1000000, "Progress report interval (in accesses)" ) progress_check = Param.Cycles( - 5000000, "Cycles before exiting " "due to lack of progress" + 5000000, "Cycles before exiting due to lack of progress" ) port = RequestPort("Port to the memory system") @@ -79,5 +79,5 @@ class MemTest(ClockedObject): # Add the ability to supress error responses on functional # accesses as Ruby needs this suppress_func_errors = Param.Bool( - False, "Suppress panic when " "functional accesses fail." + False, "Suppress panic when functional accesses fail." ) diff --git a/src/cpu/testers/rubytest/RubyTester.py b/src/cpu/testers/rubytest/RubyTester.py index 3fabece8c4..a90cfe1f82 100644 --- a/src/cpu/testers/rubytest/RubyTester.py +++ b/src/cpu/testers/rubytest/RubyTester.py @@ -38,7 +38,7 @@ class RubyTester(ClockedObject): num_cpus = Param.Int("number of cpus / RubyPorts") cpuInstDataPort = VectorRequestPort( - "cpu combo ports to inst & " "data caches" + "cpu combo ports to inst & data caches" ) cpuInstPort = VectorRequestPort("cpu ports to only inst caches") cpuDataPort = VectorRequestPort("cpu ports to only data caches") diff --git a/src/cpu/testers/traffic_gen/BaseTrafficGen.py b/src/cpu/testers/traffic_gen/BaseTrafficGen.py index b8de198f9a..0d9146756d 100644 --- a/src/cpu/testers/traffic_gen/BaseTrafficGen.py +++ b/src/cpu/testers/traffic_gen/BaseTrafficGen.py @@ -82,7 +82,7 @@ class BaseTrafficGen(ClockedObject): # progress for a long period of time. The default value is # somewhat arbitrary and may well have to be tuned. progress_check = Param.Latency( - "1ms", "Time before exiting " "due to lack of progress" + "1ms", "Time before exiting due to lack of progress" ) # Generator type used for applying Stream and/or Substream IDs to requests diff --git a/src/cpu/testers/traffic_gen/GUPSGen.py b/src/cpu/testers/traffic_gen/GUPSGen.py index 31b5ed3e10..6b8b3f72df 100644 --- a/src/cpu/testers/traffic_gen/GUPSGen.py +++ b/src/cpu/testers/traffic_gen/GUPSGen.py @@ -58,11 +58,11 @@ class GUPSGen(ClockedObject): ) update_limit = Param.Int( - 0, "The number of updates to issue before the" " simulation is over" + 0, "The number of updates to issue before the simulation is over" ) request_queue_size = Param.Int( - 1024, "Maximum number of parallel" " outstanding requests" + 1024, "Maximum number of parallel outstanding requests" ) init_memory = Param.Bool( diff --git a/src/cpu/trace/TraceCPU.py b/src/cpu/trace/TraceCPU.py index e2dc1db6c5..1be16518d7 100644 --- a/src/cpu/trace/TraceCPU.py +++ b/src/cpu/trace/TraceCPU.py @@ -64,7 +64,7 @@ class TraceCPU(BaseCPU): instTraceFile = Param.String("", "Instruction trace file") dataTraceFile = Param.String("", "Data dependency trace file") sizeStoreBuffer = Param.Unsigned( - 16, "Number of entries in the store " "buffer" + 16, "Number of entries in the store buffer" ) sizeLoadBuffer = Param.Unsigned(16, "Number of entries in the load buffer") sizeROB = Param.Unsigned(40, "Number of entries in the re-order buffer") @@ -74,13 +74,13 @@ class TraceCPU(BaseCPU): # changed when frequency is scaled. A default value of 1.0 means the same # frequency as was used for generating the traces. freqMultiplier = Param.Float( - 1.0, "Multiplier scale the Trace CPU " "frequency up or down" + 1.0, "Multiplier scale the Trace CPU frequency up or down" ) # Enable exiting when any one Trace CPU completes execution which is set to # false by default enableEarlyExit = Param.Bool( - False, "Exit when any one Trace CPU " "completes execution" + False, "Exit when any one Trace CPU completes execution" ) # If progress msg interval is set to a non-zero value, it is treated as diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py index c834d3be92..f9d953fc57 100644 --- a/src/dev/amdgpu/AMDGPU.py +++ b/src/dev/amdgpu/AMDGPU.py @@ -73,7 +73,7 @@ class AMDGPUDevice(PciDevice): rom_binary = Param.String("ROM binary dumped from hardware") trace_file = Param.String("MMIO trace collected on hardware") checkpoint_before_mmios = Param.Bool( - False, "Take a checkpoint before the" " device begins sending MMIOs" + False, "Take a checkpoint before the device begins sending MMIOs" ) # Specific to Vega10: Vega10 has two SDMA engines these do not have any diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py index 41e1bcfbb4..0009842771 100644 --- a/src/dev/arm/RealView.py +++ b/src/dev/arm/RealView.py @@ -117,7 +117,7 @@ class AmbaDmaDevice(DmaDevice): pio_addr = Param.Addr("Address for AMBA responder interface") pio_latency = Param.Latency( "10ns", - "Time between action and write/read" "result by AMBA DMA Device", + "Time between action and write/readresult by AMBA DMA Device", ) interrupt = Param.ArmInterruptPin("Interrupt that connects to GIC") amba_id = Param.UInt32("ID of AMBA device for kernel detection") @@ -634,17 +634,17 @@ class HDLcd(AmbaDmaDevice): cxx_header = "dev/arm/hdlcd.hh" cxx_class = "gem5::HDLcd" vnc = Param.VncInput( - Parent.any, "Vnc server for remote frame buffer " "display" + Parent.any, "Vnc server for remote frame buffer display" ) amba_id = 0x00141000 workaround_swap_rb = Param.Bool( - False, "Workaround incorrect color " "selector order in some kernels" + False, "Workaround incorrect color selector order in some kernels" ) workaround_dma_line_count = Param.Bool( - True, "Workaround incorrect " "DMA line count (off by 1)" + True, "Workaround incorrect DMA line count (off by 1)" ) enable_capture = Param.Bool( - True, "capture frame to " "system.framebuffer.{extension}" + True, "capture frame to system.framebuffer.{extension}" ) frame_format = Param.ImageFormat( "Auto", "image format of the captured frame" @@ -655,7 +655,7 @@ class HDLcd(AmbaDmaDevice): pxl_clk = Param.ClockDomain("Pixel clock source") pixel_chunk = Param.Unsigned(32, "Number of pixels to handle in one batch") virt_refresh_rate = Param.Frequency( - "20Hz", "Frame refresh rate " "in KVM mode" + "20Hz", "Frame refresh rate in KVM mode" ) _status = "ok" diff --git a/src/dev/arm/SMMUv3.py b/src/dev/arm/SMMUv3.py index 415eccd742..a1992ecd63 100644 --- a/src/dev/arm/SMMUv3.py +++ b/src/dev/arm/SMMUv3.py @@ -48,13 +48,13 @@ class SMMUv3DeviceInterface(ClockedObject): device_port = ResponsePort("Device port") slave = DeprecatedParam(device_port, "`slave` is now called `device_port`") ats_mem_side_port = RequestPort( - "ATS mem side port," "sends requests and receives responses" + "ATS mem side port,sends requests and receives responses" ) ats_master = DeprecatedParam( ats_mem_side_port, "`ats_master` is now called `ats_mem_side_port`" ) ats_dev_side_port = ResponsePort( - "ATS dev_side_port," "sends responses and receives requests" + "ATS dev_side_port,sends responses and receives requests" ) ats_slave = DeprecatedParam( ats_dev_side_port, "`ats_slave` is now called `ats_dev_side_port`" diff --git a/src/dev/net/Ethernet.py b/src/dev/net/Ethernet.py index 97da54c118..608f25b617 100644 --- a/src/dev/net/Ethernet.py +++ b/src/dev/net/Ethernet.py @@ -107,7 +107,7 @@ class EtherSwitch(SimObject): dump = Param.EtherDump(NULL, "dump object") fabric_speed = Param.NetworkBandwidth( - "10Gbps", "switch fabric speed in " "bits per second" + "10Gbps", "switch fabric speed in bits per second" ) interface = VectorEtherInt("Ethernet Interface") output_buffer_size = Param.MemorySize( diff --git a/src/dev/pci/PciHost.py b/src/dev/pci/PciHost.py index ef8a5ab1f5..007b17a30c 100644 --- a/src/dev/pci/PciHost.py +++ b/src/dev/pci/PciHost.py @@ -57,7 +57,7 @@ class GenericPciHost(PciHost): conf_base = Param.Addr("Config space base address") conf_size = Param.Addr("Config space base address") conf_device_bits = Param.UInt8( - 8, "Number of bits used to as an " "offset a devices address space" + 8, "Number of bits used to as an offset a devices address space" ) pci_pio_base = Param.Addr(0, "Base address for PCI IO accesses") diff --git a/src/dev/serial/Uart.py b/src/dev/serial/Uart.py index f3348d6775..2ca68b8f12 100644 --- a/src/dev/serial/Uart.py +++ b/src/dev/serial/Uart.py @@ -61,7 +61,7 @@ class SimpleUart(Uart): byte_order = Param.ByteOrder("little", "Device byte order") pio_size = Param.Addr(0x4, "Size of address range") end_on_eot = Param.Bool( - False, "End the simulation when a EOT is " "received on the UART" + False, "End the simulation when a EOT is received on the UART" ) diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 0fdc0b75a7..3a87186a30 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -115,7 +115,7 @@ class Wavefront(SimObject): wf_size = Param.Int(64, "Wavefront size (in work items)") max_ib_size = Param.Int( 13, - "Maximum size (in number of insts) of the " "instruction buffer (IB).", + "Maximum size (in number of insts) of the instruction buffer (IB).", ) @@ -134,32 +134,32 @@ class ComputeUnit(ClockedObject): num_SIMDs = Param.Int(4, "number of SIMD units per CU") num_scalar_cores = Param.Int(1, "number of Scalar cores per CU") num_scalar_mem_pipes = Param.Int( - 1, "number of Scalar memory pipelines " "per CU" + 1, "number of Scalar memory pipelines per CU" ) simd_width = Param.Int(16, "width (number of lanes) per SIMD unit") operand_network_length = Param.Int( - 1, "number of pipe stages of operand " "network" + 1, "number of pipe stages of operand network" ) spbypass_pipe_length = Param.Int( - 4, "vector ALU Single Precision bypass " "latency" + 4, "vector ALU Single Precision bypass latency" ) dpbypass_pipe_length = Param.Int( - 4, "vector ALU Double Precision bypass " "latency" + 4, "vector ALU Double Precision bypass latency" ) scalar_pipe_length = Param.Int(1, "number of pipe stages per scalar ALU") issue_period = Param.Int(4, "number of cycles per issue period") vrf_gm_bus_latency = Param.Int( - 1, "number of cycles per use of VRF to " "GM bus" + 1, "number of cycles per use of VRF to GM bus" ) srf_scm_bus_latency = Param.Int( - 1, "number of cycles per use of SRF " "to Scalar Mem bus" + 1, "number of cycles per use of SRF to Scalar Mem bus" ) vrf_lm_bus_latency = Param.Int( - 1, "number of cycles per use of VRF to " "LM bus" + 1, "number of cycles per use of VRF to LM bus" ) num_global_mem_pipes = Param.Int(1, "number of global memory pipes per CU") @@ -194,10 +194,10 @@ class ComputeUnit(ClockedObject): system = Param.System(Parent.any, "system object") cu_id = Param.Int("CU id") vrf_to_coalescer_bus_width = Param.Int( - 64, "VRF->Coalescer data bus " "width in bytes" + 64, "VRF->Coalescer data bus width in bytes" ) coalescer_to_vrf_bus_width = Param.Int( - 64, "Coalescer->VRF data bus " "width in bytes" + 64, "Coalescer->VRF data bus width in bytes" ) memory_port = VectorRequestPort("Port to the memory system") @@ -211,7 +211,7 @@ class ComputeUnit(ClockedObject): perLaneTLB = Param.Bool(False, "enable per-lane TLB") prefetch_depth = Param.Int( 0, - "Number of prefetches triggered at a time" "(0 turns off prefetching)", + "Number of prefetches triggered at a time(0 turns off prefetching)", ) prefetch_stride = Param.Int(1, "Fixed Prefetch Stride (1 means next-page)") prefetch_prev_type = Param.PrefetchType( @@ -225,24 +225,24 @@ class ComputeUnit(ClockedObject): functionalTLB = Param.Bool(False, "Assume TLB causes no delay") localMemBarrier = Param.Bool( - False, "Assume Barriers do not wait on " "kernel end" + False, "Assume Barriers do not wait on kernel end" ) countPages = Param.Bool( False, - "Generate per-CU file of all pages " "touched and how many times", + "Generate per-CU file of all pages touched and how many times", ) scalar_mem_queue_size = Param.Int( - 32, "Number of entries in scalar " "memory pipeline's queues" + 32, "Number of entries in scalar memory pipeline's queues" ) global_mem_queue_size = Param.Int( - 256, "Number of entries in the global " "memory pipeline's queues" + 256, "Number of entries in the global memory pipeline's queues" ) local_mem_queue_size = Param.Int( - 256, "Number of entries in the local " "memory pipeline's queues" + 256, "Number of entries in the local memory pipeline's queues" ) max_wave_requests = Param.Int( - 64, "number of pending vector memory " "requests per wavefront" + 64, "number of pending vector memory requests per wavefront" ) max_cu_tokens = Param.Int( 4, @@ -254,18 +254,18 @@ class ComputeUnit(ClockedObject): localDataStore = Param.LdsState("the LDS for this CU") vector_register_file = VectorParam.VectorRegisterFile( - "Vector register " "file" + "Vector register file" ) scalar_register_file = VectorParam.ScalarRegisterFile( - "Scalar register " "file" + "Scalar register file" ) out_of_order_data_delivery = Param.Bool( - False, "enable OoO data delivery" " in the GM pipeline" + False, "enable OoO data delivery in the GM pipeline" ) register_manager = Param.RegisterManager("Register Manager") fetch_depth = Param.Int( - 2, "number of i-cache lines that may be " "buffered in the fetch unit." + 2, "number of i-cache lines that may be buffered in the fetch unit." ) diff --git a/src/gpu-compute/LdsState.py b/src/gpu-compute/LdsState.py index 637cf11264..c81859331c 100644 --- a/src/gpu-compute/LdsState.py +++ b/src/gpu-compute/LdsState.py @@ -41,7 +41,7 @@ class LdsState(ClockedObject): size = Param.Int(65536, "the size of the LDS") range = Param.AddrRange("64kB", "address space of the LDS") bankConflictPenalty = Param.Int( - 1, "penalty per LDS bank conflict when " "accessing data" + 1, "penalty per LDS bank conflict when accessing data" ) banks = Param.Int(32, "Number of LDS banks") cuPort = ResponsePort("port that goes to the compute unit") diff --git a/src/learning_gem5/part2/HelloObject.py b/src/learning_gem5/part2/HelloObject.py index 07ffd01c06..6b9aa8f811 100644 --- a/src/learning_gem5/part2/HelloObject.py +++ b/src/learning_gem5/part2/HelloObject.py @@ -36,7 +36,7 @@ class HelloObject(SimObject): time_to_wait = Param.Latency("Time before firing the event") number_of_fires = Param.Int( - 1, "Number of times to fire the event before " "goodbye" + 1, "Number of times to fire the event before goodbye" ) goodbye_object = Param.GoodbyeObject("A goodbye object") @@ -51,5 +51,5 @@ class GoodbyeObject(SimObject): "1kB", "Size of buffer to fill with goodbye" ) write_bandwidth = Param.MemoryBandwidth( - "100MB/s", "Bandwidth to fill " "the buffer" + "100MB/s", "Bandwidth to fill the buffer" ) diff --git a/src/mem/AddrMapper.py b/src/mem/AddrMapper.py index 1897236526..932fbf14e1 100644 --- a/src/mem/AddrMapper.py +++ b/src/mem/AddrMapper.py @@ -50,13 +50,13 @@ class AddrMapper(SimObject): # one port in each direction mem_side_port = RequestPort( - "This port sends requests and " "receives responses" + "This port sends requests and receives responses" ) master = DeprecatedParam( mem_side_port, "`master` is now called `mem_side_port`" ) cpu_side_port = ResponsePort( - "This port receives requests and " "sends responses" + "This port receives requests and sends responses" ) slave = DeprecatedParam( cpu_side_port, "`slave` is now called `cpu_side_port`" diff --git a/src/mem/Bridge.py b/src/mem/Bridge.py index a82f410d56..8131d62ef8 100644 --- a/src/mem/Bridge.py +++ b/src/mem/Bridge.py @@ -46,13 +46,13 @@ class Bridge(ClockedObject): cxx_class = "gem5::Bridge" mem_side_port = RequestPort( - "This port sends requests and " "receives responses" + "This port sends requests and receives responses" ) master = DeprecatedParam( mem_side_port, "`master` is now called `mem_side_port`" ) cpu_side_port = ResponsePort( - "This port receives requests and " "sends responses" + "This port receives requests and sends responses" ) slave = DeprecatedParam( cpu_side_port, "`slave` is now called `cpu_side_port`" diff --git a/src/mem/CommMonitor.py b/src/mem/CommMonitor.py index 288aeb5a07..ab946f1e91 100644 --- a/src/mem/CommMonitor.py +++ b/src/mem/CommMonitor.py @@ -49,13 +49,13 @@ class CommMonitor(SimObject): # one port in each direction mem_side_port = RequestPort( - "This port sends requests and " "receives responses" + "This port sends requests and receives responses" ) master = DeprecatedParam( mem_side_port, "`master` is now called `mem_side_port`" ) cpu_side_port = ResponsePort( - "This port receives requests and " "sends responses" + "This port receives requests and sends responses" ) slave = DeprecatedParam( cpu_side_port, "`slave` is now called `cpu_side_port`" @@ -70,10 +70,10 @@ class CommMonitor(SimObject): # histogram of burst length of packets (not using sample period) burst_length_bins = Param.Unsigned( - "20", "# bins in burst length " "histograms" + "20", "# bins in burst length histograms" ) disable_burst_length_hists = Param.Bool( - False, "Disable burst length " "histograms" + False, "Disable burst length histograms" ) # bandwidth per sample period @@ -95,18 +95,18 @@ class CommMonitor(SimObject): # outstanding requests (that did not yet get a response) per # sample period outstanding_bins = Param.Unsigned( - "20", "# bins in outstanding " "requests histograms" + "20", "# bins in outstanding requests histograms" ) disable_outstanding_hists = Param.Bool( - False, "Disable outstanding " "requests histograms" + False, "Disable outstanding requests histograms" ) # transactions (requests) observed per sample period transaction_bins = Param.Unsigned( - "20", "# bins in transaction " "count histograms" + "20", "# bins in transaction count histograms" ) disable_transaction_hists = Param.Bool( - False, "Disable transaction count " "histograms" + False, "Disable transaction count histograms" ) # address distributions (heatmaps) with associated address masks diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py index 87bc11b94f..9c041e5838 100644 --- a/src/mem/DRAMInterface.py +++ b/src/mem/DRAMInterface.py @@ -57,7 +57,7 @@ class DRAMInterface(MemInterface): # enforce a limit on the number of accesses per row max_accesses_per_row = Param.Unsigned( - 16, "Max accesses per row before " "closing" + 16, "Max accesses per row before closing" ) # default to 0 bank groups per rank, indicating bank group architecture @@ -139,7 +139,7 @@ class DRAMInterface(MemInterface): # write-to-read, same rank turnaround penalty for same bank group tWTR_L = Param.Latency( Self.tWTR, - "Write to read, same rank switching " "time, same bank group", + "Write to read, same rank switching time, same bank group", ) # minimum precharge to precharge delay time diff --git a/src/mem/DRAMsim3.py b/src/mem/DRAMsim3.py index 0da9c1067b..40f61608d8 100644 --- a/src/mem/DRAMsim3.py +++ b/src/mem/DRAMsim3.py @@ -44,11 +44,11 @@ class DRAMsim3(AbstractMemory): # A single port for now port = ResponsePort( - "port for receiving requests from" "the CPU or other requestor" + "port for receiving requests fromthe CPU or other requestor" ) configFile = Param.String( - "ext/dramsim3/DRAMsim3/configs/" "DDR4_8Gb_x8_2400.ini", + "ext/dramsim3/DRAMsim3/configs/DDR4_8Gb_x8_2400.ini", "The configuration file to use with DRAMSim3", ) filePath = Param.String( diff --git a/src/mem/ExternalSlave.py b/src/mem/ExternalSlave.py index ce2d5b8b36..b7cd9b21f3 100644 --- a/src/mem/ExternalSlave.py +++ b/src/mem/ExternalSlave.py @@ -45,7 +45,7 @@ class ExternalSlave(SimObject): port = SlavePort("Slave port") addr_ranges = VectorParam.AddrRange( - [], "Addresses served by" " this port's external agent" + [], "Addresses served by this port's external agent" ) port_type = Param.String( diff --git a/src/mem/MemChecker.py b/src/mem/MemChecker.py index 9fc0d7c59c..fcee653265 100644 --- a/src/mem/MemChecker.py +++ b/src/mem/MemChecker.py @@ -51,16 +51,16 @@ class MemCheckerMonitor(SimObject): # one port in each direction mem_side_port = RequestPort( - "This port sends requests and receives " "responses" + "This port sends requests and receives responses" ) master = DeprecatedParam( - mem_side_port, "`master` is now called " "`mem_side_port`" + mem_side_port, "`master` is now called `mem_side_port`" ) cpu_side_port = ResponsePort( - "This port receives requests and sends " "responses" + "This port receives requests and sends responses" ) slave = DeprecatedParam( - cpu_side_port, "`slave` is now called " "`cpu_side_port`" + cpu_side_port, "`slave` is now called `cpu_side_port`" ) warn_only = Param.Bool(False, "Warn about violations only") memchecker = Param.MemChecker("Instance shared with other monitors") diff --git a/src/mem/MemCtrl.py b/src/mem/MemCtrl.py index c8acd22ed9..549616ccba 100644 --- a/src/mem/MemCtrl.py +++ b/src/mem/MemCtrl.py @@ -63,7 +63,7 @@ class MemCtrl(QoSMemCtrl): # Interface to memory media dram = Param.MemInterface( - "Memory interface, can be a DRAM" "or an NVM interface " + "Memory interface, can be a DRAMor an NVM interface " ) # read and write buffer depths are set in the interface @@ -79,12 +79,12 @@ class MemCtrl(QoSMemCtrl): # minimum write bursts to schedule before switching back to reads min_writes_per_switch = Param.Unsigned( - 16, "Minimum write bursts before " "switching to reads" + 16, "Minimum write bursts before switching to reads" ) # minimum read bursts to schedule before switching back to writes min_reads_per_switch = Param.Unsigned( - 16, "Minimum read bursts before " "switching to writes" + 16, "Minimum read bursts before switching to writes" ) # scheduler, address map and page policy diff --git a/src/mem/MemDelay.py b/src/mem/MemDelay.py index 430ffb77f5..eb4aaa7bf1 100644 --- a/src/mem/MemDelay.py +++ b/src/mem/MemDelay.py @@ -44,13 +44,13 @@ class MemDelay(ClockedObject): abstract = True mem_side_port = RequestPort( - "This port sends requests and " "receives responses" + "This port sends requests and receives responses" ) master = DeprecatedParam( mem_side_port, "`master` is now called `mem_side_port`" ) cpu_side_port = ResponsePort( - "This port receives requests and " "sends responses" + "This port receives requests and sends responses" ) slave = DeprecatedParam( cpu_side_port, "`slave` is now called `cpu_side_port`" diff --git a/src/mem/MemInterface.py b/src/mem/MemInterface.py index a32a3b5ec9..60bf99bf47 100644 --- a/src/mem/MemInterface.py +++ b/src/mem/MemInterface.py @@ -73,11 +73,11 @@ class MemInterface(AbstractMemory): device_size = Param.MemorySize("Size of memory device") # the physical organisation of the memory device_bus_width = Param.Unsigned( - "data bus width in bits for each " "memory device/chip" + "data bus width in bits for each memory device/chip" ) burst_length = Param.Unsigned("Burst lenght (BL) in beats") device_rowbuffer_size = Param.MemorySize( - "Page (row buffer) size per " "device/chip" + "Page (row buffer) size per device/chip" ) devices_per_rank = Param.Unsigned("Number of devices/chips per rank") ranks_per_channel = Param.Unsigned("Number of ranks per channel") @@ -96,7 +96,7 @@ class MemInterface(AbstractMemory): # Read/Write requests with data size larger than one full burst are broken # down into multiple requests in the controller tBURST = Param.Latency( - "Burst duration " "(typically burst length / 2 cycles)" + "Burst duration (typically burst length / 2 cycles)" ) # write-to-read, same rank turnaround penalty diff --git a/src/mem/PortTerminator.py b/src/mem/PortTerminator.py index 05fdd1177d..85d0af0771 100644 --- a/src/mem/PortTerminator.py +++ b/src/mem/PortTerminator.py @@ -35,8 +35,8 @@ class PortTerminator(SimObject): cxx_class = "gem5::PortTerminator" req_ports = VectorRequestPort( - "Vector port for connecting terminating " "response ports." + "Vector port for connecting terminating response ports." ) resp_ports = VectorResponsePort( - "Vector port for terminating " "request ports." + "Vector port for terminating request ports." ) diff --git a/src/mem/SerialLink.py b/src/mem/SerialLink.py index a40b714258..6b767050d6 100644 --- a/src/mem/SerialLink.py +++ b/src/mem/SerialLink.py @@ -50,13 +50,13 @@ class SerialLink(ClockedObject): cxx_class = "gem5::SerialLink" mem_side_port = RequestPort( - "This port sends requests and " "receives responses" + "This port sends requests and receives responses" ) master = DeprecatedParam( mem_side_port, "`master` is now called `mem_side_port`" ) cpu_side_port = ResponsePort( - "This port receives requests and " "sends responses" + "This port receives requests and sends responses" ) slave = DeprecatedParam( cpu_side_port, "`slave` is now called `cpu_side_port`" @@ -71,7 +71,7 @@ class SerialLink(ClockedObject): # link belongs to and the number of lanes: num_lanes = Param.Unsigned( 1, - "Number of parallel lanes inside the serial" "link. (aka. lane width)", + "Number of parallel lanes inside the seriallink. (aka. lane width)", ) link_speed = Param.UInt64( 1, diff --git a/src/mem/XBar.py b/src/mem/XBar.py index dbadccb861..d0becc22a8 100644 --- a/src/mem/XBar.py +++ b/src/mem/XBar.py @@ -51,13 +51,13 @@ class BaseXBar(ClockedObject): cxx_class = "gem5::BaseXBar" cpu_side_ports = VectorResponsePort( - "Vector port for connecting " "mem side ports" + "Vector port for connecting mem side ports" ) slave = DeprecatedParam( cpu_side_ports, "`slave` is now called `cpu_side_ports`" ) mem_side_ports = VectorRequestPort( - "Vector port for connecting " "cpu side ports" + "Vector port for connecting cpu side ports" ) master = DeprecatedParam( mem_side_ports, "`master` is now called `mem_side_ports`" @@ -98,7 +98,7 @@ class BaseXBar(ClockedObject): # a two-level hierarchical lookup. This is useful e.g. for the PCI # xbar configuration. use_default_range = Param.Bool( - False, "Perform address mapping for " "the default port" + False, "Perform address mapping for the default port" ) @@ -130,12 +130,12 @@ class CoherentXBar(BaseXBar): # already committed to responding, by establishing if the crossbar # is the point of coherency or not. point_of_coherency = Param.Bool( - False, "Consider this crossbar the " "point of coherency" + False, "Consider this crossbar the point of coherency" ) # Specify whether this crossbar is the point of unification. point_of_unification = Param.Bool( - False, "Consider this crossbar the " "point of unification" + False, "Consider this crossbar the point of unification" ) system = Param.System(Parent.any, "System that the crossbar belongs to.") diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py index 1dfab1957f..49665dde91 100644 --- a/src/mem/cache/Cache.py +++ b/src/mem/cache/Cache.py @@ -61,10 +61,10 @@ class WriteAllocator(SimObject): # allow whole-line write coalescing, and eventually switches to a # write-no-allocate policy. coalesce_limit = Param.Unsigned( - 2, "Consecutive lines written before " "delaying for coalescing" + 2, "Consecutive lines written before delaying for coalescing" ) no_allocate_limit = Param.Unsigned( - 12, "Consecutive lines written before" " skipping allocation" + 12, "Consecutive lines written before skipping allocation" ) delay_threshold = Param.Unsigned( @@ -129,7 +129,7 @@ class BaseCache(ClockedObject): # co-allocatable with another existing entry of the same superblock, # so try move the block to co-allocate it move_contractions = Param.Bool( - True, "Try to co-allocate blocks that " "contract" + True, "Try to co-allocate blocks that contract" ) sequential_access = Param.Bool( diff --git a/src/mem/cache/compressors/Compressors.py b/src/mem/cache/compressors/Compressors.py index c8f82c55a1..eef5f77a18 100644 --- a/src/mem/cache/compressors/Compressors.py +++ b/src/mem/cache/compressors/Compressors.py @@ -232,7 +232,7 @@ class FrequentValuesCompressor(BaseCacheCompressor): ) check_saturation = Param.Bool( False, - "Whether the counters should be " "manipulated in case of saturation.", + "Whether the counters should be manipulated in case of saturation.", ) vft_assoc = Param.Int(16, "Associativity of the VFT.") diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py index 4e7f632bfb..ade187fa39 100644 --- a/src/mem/cache/tags/Tags.py +++ b/src/mem/cache/tags/Tags.py @@ -139,7 +139,7 @@ class FALRU(BaseTags): cxx_class = "gem5::FALRU" min_tracked_cache_size = Param.MemorySize( - "128KiB", "Minimum cache size" " for which we track statistics" + "128KiB", "Minimum cache size for which we track statistics" ) # This tag uses its own embedded indexing diff --git a/src/mem/probes/StackDistProbe.py b/src/mem/probes/StackDistProbe.py index 2a9550bea5..5b44d9d333 100644 --- a/src/mem/probes/StackDistProbe.py +++ b/src/mem/probes/StackDistProbe.py @@ -44,7 +44,7 @@ class StackDistProbe(BaseMemProbe): cxx_class = "gem5::StackDistProbe" system = Param.System( - Parent.any, "System to use when determining system cache " "line size" + Parent.any, "System to use when determining system cache line size" ) line_size = Param.Unsigned( diff --git a/src/mem/ruby/slicc_interface/Controller.py b/src/mem/ruby/slicc_interface/Controller.py index 185812a044..42447f1cca 100644 --- a/src/mem/ruby/slicc_interface/Controller.py +++ b/src/mem/ruby/slicc_interface/Controller.py @@ -49,7 +49,7 @@ class RubyController(ClockedObject): version = Param.Int("") addr_ranges = VectorParam.AddrRange( - [AllMemory], "Address range this " "controller responds to" + [AllMemory], "Address range this controller responds to" ) cluster_id = Param.UInt32(0, "Id of this controller's cluster") diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py index da459de133..fcf49e38b7 100644 --- a/src/mem/ruby/system/GPUCoalescer.py +++ b/src/mem/ruby/system/GPUCoalescer.py @@ -44,7 +44,7 @@ class RubyGPUCoalescer(RubyPort): 40 * 64, "max requests (incl. prefetches) outstanding" ) max_coalesces_per_cycle = Param.Int( - 1, "max instructions that can be " "coalesced in a single cycle" + 1, "max instructions that can be coalesced in a single cycle" ) icache = Param.RubyCache("") diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 55ee527c41..b5af9ca8ed 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -184,7 +184,7 @@ class StateMachine(Symbol): if type_ident == "%s_TBE" % self.ident: if self.TBEType != None: self.error( - "Multiple Transaction Buffer types in a " "single machine." + "Multiple Transaction Buffer types in a single machine." ) self.TBEType = type diff --git a/src/python/gem5/components/memory/dramsim_3.py b/src/python/gem5/components/memory/dramsim_3.py index b7eba919fc..e5c1877fb5 100644 --- a/src/python/gem5/components/memory/dramsim_3.py +++ b/src/python/gem5/components/memory/dramsim_3.py @@ -53,7 +53,7 @@ def config_ds3(mem_type: str, num_chnls: int) -> Tuple[str, str]: ) elif os.path.isfile(input_file): raise Exception( - "The configuration file '" + input_file + "' cannot " " be found." + "The configuration file '" + input_file + "' cannot be found." ) output_file = "/tmp/" + mem_type + "_chnls" + str(num_chnls) + ".ini" diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index 91cbbc59bd..b5dfca9752 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -478,7 +478,7 @@ def cxxMethod(*args, **kwargs): args, varargs, keywords, defaults = inspect.getargspec(func) if varargs or keywords: raise ValueError( - "Wrapped methods must not contain variable " "arguments" + "Wrapped methods must not contain variable arguments" ) # Create tuples of (argument, default) @@ -571,7 +571,7 @@ class SimObjectCliWrapper(object): setattr(sim_object, key, val) else: raise SimObjectCliWrapperException( - "tried to set or unsettable" "object parameter: " + key + "tried to set or unsettableobject parameter: " + key ) else: raise SimObjectCliWrapperException( diff --git a/src/python/m5/params.py b/src/python/m5/params.py index 8e96f4668e..e76380bc40 100644 --- a/src/python/m5/params.py +++ b/src/python/m5/params.py @@ -1457,7 +1457,7 @@ class MetaEnum(MetaParamValue): cls.map[val] = idx else: raise TypeError( - "Enum-derived class must define " "attribute 'map' or 'vals'" + "Enum-derived class must define attribute 'map' or 'vals'" ) if cls.is_class: diff --git a/src/sim/System.py b/src/sim/System.py index a5722e80fd..eb1280f248 100644 --- a/src/sim/System.py +++ b/src/sim/System.py @@ -77,7 +77,7 @@ class System(SimObject): # mmap). By enabling this flag, we accommodate cases where a large # (but sparse) memory is simulated. mmap_using_noreserve = Param.Bool( - False, "mmap the backing store " "without reserving swap" + False, "mmap the backing store without reserving swap" ) # The memory ranges are to be populated when creating the system @@ -89,7 +89,7 @@ class System(SimObject): # The ranges backed by a shadowed ROM shadow_rom_ranges = VectorParam.AddrRange( - [], "Ranges backed by a " "shadowed ROM" + [], "Ranges backed by a shadowed ROM" ) shared_backstore = Param.String( diff --git a/src/systemc/tests/verify.py b/src/systemc/tests/verify.py index 5191062e50..0d8ce3cf75 100755 --- a/src/systemc/tests/verify.py +++ b/src/systemc/tests/verify.py @@ -148,7 +148,7 @@ class RunPhase(TestPhaseBase): "--timeout", type=int, metavar="SECONDS", - help="Time limit for each run in seconds, " "0 to disable.", + help="Time limit for each run in seconds, 0 to disable.", default=60, ) parser.add_argument( @@ -604,7 +604,7 @@ filter_opts = parser.add_mutually_exclusive_group() filter_opts.add_argument( "--filter", default="True", - help="Python expression which filters tests based " "on their properties", + help="Python expression which filters tests based on their properties", ) filter_opts.add_argument( "--filter-file", diff --git a/tests/configs/gpu-randomtest-ruby.py b/tests/configs/gpu-randomtest-ruby.py index ceede7f500..cfc65526e5 100644 --- a/tests/configs/gpu-randomtest-ruby.py +++ b/tests/configs/gpu-randomtest-ruby.py @@ -57,7 +57,7 @@ parser.add_argument( help="Number of GPU Command Processors (CP)", ) parser.add_argument( - "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU" + "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU" ) parser.add_argument( "--wf-size", type=int, default=64, help="Wavefront size(in workitems)" @@ -66,7 +66,7 @@ parser.add_argument( "--wfs-per-simd", type=int, default=10, - help="Number of " "WF slots per SIMD", + help="Number of WF slots per SIMD", ) # Add the ruby specific and protocol specific options diff --git a/tests/configs/gpu-ruby.py b/tests/configs/gpu-ruby.py index e45c446373..7606168a98 100644 --- a/tests/configs/gpu-ruby.py +++ b/tests/configs/gpu-ruby.py @@ -79,13 +79,13 @@ parser.add_argument( help="Number of GPU Command Processors (CP)", ) parser.add_argument( - "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU" + "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU" ) parser.add_argument( "--cu-per-sqc", type=int, default=4, - help="number of CUs" "sharing an SQC (icache, and thus icache TLB)", + help="number of CUssharing an SQC (icache, and thus icache TLB)", ) parser.add_argument( "--wf-size", type=int, default=64, help="Wavefront size(in workitems)" @@ -94,7 +94,7 @@ parser.add_argument( "--wfs-per-simd", type=int, default=8, - help="Number of " "WF slots per SIMD", + help="Number of WF slots per SIMD", ) parser.add_argument( "--sp-bypass-path-length", @@ -194,13 +194,11 @@ parser.add_argument( action="store_true", help="Count Page Accesses and output in per-CU output files", ) -parser.add_argument( - "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs" -) +parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth forTLBs") parser.add_argument( "--pf-type", type=str, - help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE", + help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE", ) parser.add_argument("--pf-stride", type=int, help="set prefetch stride") parser.add_argument( diff --git a/tests/gem5/fixture.py b/tests/gem5/fixture.py index 65b5454cae..c8bc79ff64 100644 --- a/tests/gem5/fixture.py +++ b/tests/gem5/fixture.py @@ -170,7 +170,7 @@ class SConsFixture(UniqueFixture): ) else: log.test_log.message( - "Building the following targets." " This may take a while." + "Building the following targets. This may take a while." ) log.test_log.message("%s" % (", ".join(self.targets))) log.test_log.message( @@ -391,7 +391,7 @@ class DownloadedArchive(DownloadedProgram): except (urllib.error.URLError, socket.timeout): # Problem checking the server, use the old files. log.test_log.debug( - "Could not contact server. " "Binaries may be old." + "Could not contact server. Binaries may be old." ) return # If the server version is more recent, download it diff --git a/util/cpt_upgraders/isa-is-simobject.py b/util/cpt_upgraders/isa-is-simobject.py index 3f0132ce36..077d4d98bf 100644 --- a/util/cpt_upgraders/isa-is-simobject.py +++ b/util/cpt_upgraders/isa-is-simobject.py @@ -94,7 +94,7 @@ def upgrader(cpt): else: if cpt.items(sec): raise ValueError( - "Unexpected populated ISA section in old " "checkpoint" + "Unexpected populated ISA section in old checkpoint" ) for (key, value) in options: diff --git a/util/gerrit-bot/extract_gitcookies.py b/util/gerrit-bot/extract_gitcookies.py index 24f2ca0afa..ef17be10de 100755 --- a/util/gerrit-bot/extract_gitcookies.py +++ b/util/gerrit-bot/extract_gitcookies.py @@ -62,7 +62,7 @@ if __name__ == "__main__": ) parser.add_argument( "input", - help=("Path to a .gitcookies file or a file with " "a similar format"), + help=("Path to a .gitcookies file or a file with a similar format"), ) parser.add_argument("output", help="Path to the output file") args = parser.parse_args() diff --git a/util/maint/list_changes.py b/util/maint/list_changes.py index 9ada2b52f0..465ae1abb0 100755 --- a/util/maint/list_changes.py +++ b/util/maint/list_changes.py @@ -179,14 +179,14 @@ def _main(): "-u", type=str, default="origin/master", - help="Upstream branch for comparison. " "Default: %(default)s", + help="Upstream branch for comparison. Default: %(default)s", ) parser.add_argument( "--feature", "-f", type=str, default="HEAD", - help="Feature branch for comparison. " "Default: %(default)s", + help="Feature branch for comparison. Default: %(default)s", ) parser.add_argument( "--show-unknown", @@ -199,7 +199,7 @@ def _main(): parser.add_argument( "--deep-search", action="store_true", - help="Use a deep search to find incorrectly " "rebased changes", + help="Use a deep search to find incorrectly rebased changes", ) parser.add_argument( "paths", diff --git a/util/maint/show_changes_by_file.py b/util/maint/show_changes_by_file.py index be222620a0..ea739f78fe 100755 --- a/util/maint/show_changes_by_file.py +++ b/util/maint/show_changes_by_file.py @@ -95,14 +95,14 @@ def _main(): "-u", type=str, default="origin/master", - help="Upstream branch for comparison. " "Default: %(default)s", + help="Upstream branch for comparison. Default: %(default)s", ) parser.add_argument( "--feature", "-f", type=str, default="HEAD", - help="Feature branch for comparison. " "Default: %(default)s", + help="Feature branch for comparison. Default: %(default)s", ) parser.add_argument( "paths", diff --git a/util/plot_dram/dram_sweep_plot.py b/util/plot_dram/dram_sweep_plot.py index 8acb6ab681..ad7bc5e3b6 100755 --- a/util/plot_dram/dram_sweep_plot.py +++ b/util/plot_dram/dram_sweep_plot.py @@ -90,7 +90,7 @@ def main(): for line in simout: match = re.match( - "DRAM sweep with " "burst: (\d+), banks: (\d+), max stride: (\d+)", + "DRAM sweep with burst: (\d+), banks: (\d+), max stride: (\d+)", line, ) if match: From aeb617868f805dc058752d4ae3fd832a27941594 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Mon, 14 Nov 2022 03:13:35 +0000 Subject: [PATCH 029/492] stdlib: Add MESI Three Level cache hierarchy Change-Id: Ibea6b71d62b71f7817f6860bbceed9e1915bb002 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65591 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/python/SConscript | 23 ++ .../abstract_three_level_cache_hierarchy.py | 52 ++++ .../ruby/caches/mesi_three_level/__init__.py | 0 .../ruby/caches/mesi_three_level/directory.py | 51 ++++ .../caches/mesi_three_level/dma_controller.py | 43 ++++ .../ruby/caches/mesi_three_level/l1_cache.py | 110 +++++++++ .../ruby/caches/mesi_three_level/l2_cache.py | 113 +++++++++ .../ruby/caches/mesi_three_level/l3_cache.py | 89 +++++++ .../ruby/mesi_three_level_cache_hierarchy.py | 225 ++++++++++++++++++ 9 files changed, 706 insertions(+) create mode 100644 src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/__init__.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py create mode 100644 src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py diff --git a/src/python/SConscript b/src/python/SConscript index e7e464e2df..aeeb8925a3 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -61,6 +61,8 @@ PySource('gem5.components.cachehierarchies', 'gem5/components/cachehierarchies/abstract_cache_hierarchy.py') PySource('gem5.components.cachehierarchies', 'gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py') +PySource('gem5.components.cachehierarchies', + 'gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py') PySource('gem5.components.cachehierarchies.chi', 'gem5/components/cachehierarchies/chi/__init__.py') PySource('gem5.components.cachehierarchies.chi', @@ -108,6 +110,9 @@ PySource('gem5.components.cachehierarchies.ruby', 'gem5/components/cachehierarchies/ruby/abstract_ruby_cache_hierarchy.py') PySource('gem5.components.cachehierarchies.ruby', 'gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py') +PySource('gem5.components.cachehierarchies.ruby', + 'gem5/components/cachehierarchies/ruby/' + 'mesi_three_level_cache_hierarchy.py') PySource('gem5.components.cachehierarchies.ruby', 'gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py') PySource('gem5.components.cachehierarchies.ruby.caches', @@ -131,6 +136,24 @@ PySource('gem5.components.cachehierarchies.ruby.caches.mesi_two_level', 'gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py') PySource('gem5.components.cachehierarchies.ruby.caches.mesi_two_level', 'gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py') +PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level', + 'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/' + '__init__.py') +PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level', + 'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/' + 'directory.py') +PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level', + 'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/' + 'dma_controller.py') +PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level', + 'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/' + 'l1_cache.py') +PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level', + 'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/' + 'l2_cache.py') +PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level', + 'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/' + 'l3_cache.py') PySource('gem5.components.cachehierarchies.ruby.caches.mi_example', 'gem5/components/cachehierarchies/ruby/caches/mi_example/__init__.py') PySource('gem5.components.cachehierarchies.ruby.caches.mi_example', diff --git a/src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py new file mode 100644 index 0000000000..4d2f21abdc --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py @@ -0,0 +1,52 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +class AbstractThreeLevelCacheHierarchy: + """ + An abstract three-level hierarchy with configurable size and associativity + for each of L1, L2, and L3 caches. + """ + + def __init__( + self, + l1i_size: str, + l1i_assoc: int, + l1d_size: str, + l1d_assoc: int, + l2_size: str, + l2_assoc: int, + l3_size: str, + l3_assoc: int, + ): + self._l1i_size = l1i_size + self._l1i_assoc = l1i_assoc + self._l1d_size = l1d_size + self._l1d_assoc = l1d_assoc + self._l2_size = l2_size + self._l2_assoc = l2_assoc + self._l3_size = l3_size + self._l3_assoc = l3_assoc diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/__init__.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py new file mode 100644 index 0000000000..cd4f166fed --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py @@ -0,0 +1,51 @@ +# Copyright (c) 2021 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from ......utils.override import overrides +from ..abstract_directory import AbstractDirectory + +from m5.objects import MessageBuffer, RubyDirectoryMemory + + +class Directory(AbstractDirectory): + def __init__(self, network, cache_line_size, mem_range, port): + + super().__init__(network, cache_line_size) + self.addr_ranges = [mem_range] + self.directory = RubyDirectoryMemory() + # Connect this directory to the memory side. + self.memory_out_port = port + + @overrides(AbstractDirectory) + def connectQueues(self, network): + self.requestToDir = MessageBuffer() + self.requestToDir.in_port = network.out_port + self.responseToDir = MessageBuffer() + self.responseToDir.in_port = network.out_port + self.responseFromDir = MessageBuffer() + self.responseFromDir.out_port = network.in_port + self.requestToMemory = MessageBuffer() + self.responseFromMemory = MessageBuffer() diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py new file mode 100644 index 0000000000..ab76d4cb5e --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py @@ -0,0 +1,43 @@ +# Copyright (c) 2021 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from ......utils.override import overrides +from ..abstract_dma_controller import AbstractDMAController + +from m5.objects import MessageBuffer + + +class DMAController(AbstractDMAController): + def __init__(self, network, cache_line_size): + super().__init__(network, cache_line_size) + + @overrides(AbstractDMAController) + def connectQueues(self, network): + self.mandatoryQueue = MessageBuffer() + self.responseFromDir = MessageBuffer(ordered=True) + self.responseFromDir.in_port = network.out_port + self.requestToDir = MessageBuffer() + self.requestToDir.out_port = network.in_port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py new file mode 100644 index 0000000000..2ce13d3b08 --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py @@ -0,0 +1,110 @@ +# Copyright (c) 2022 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .....processors.abstract_core import AbstractCore +from ......isas import ISA +from ......utils.override import * + +from m5.objects import ( + MessageBuffer, + RubyPrefetcher, + RubyCache, + ClockDomain, + LRURP, + L0Cache_Controller, +) + +import math + +# L0Cache_Controller is the ruby backend's terminology corresponding to +# L1 cache in stdlib terms. +class L1Cache(L0Cache_Controller): + + _version = 0 + + @classmethod + def versionCount(cls): + cls._version += 1 + return cls._version - 1 + + def __init__( + self, + l1i_size, + l1i_assoc, + l1d_size, + l1d_assoc, + network, + core: AbstractCore, + cache_line_size, + target_isa: ISA, + clk_domain: ClockDomain, + ): + super().__init__() + + # This is the cache memory object that stores the cache data and tags + self.Icache = RubyCache( + size=l1i_size, + assoc=l1i_assoc, + start_index_bit=self.getBlockSizeBits(), + is_icache=True, + replacement_policy=LRURP(), + ) + self.Dcache = RubyCache( + size=l1d_size, + assoc=l1d_assoc, + start_index_bit=self.getBlockSizeBits(), + is_icache=False, + replacement_policy=LRURP(), + ) + self.clk_domain = clk_domain + self.prefetcher = RubyPrefetcher() + self.send_evictions = core.requires_send_evicts() + self.transitions_per_cycle = 32 + self.enable_prefetch = False + self.request_latency = 2 + self.response_latency = 2 + + self.version = self.versionCount() + self._cache_line_size = cache_line_size + self.connectQueues(network) + + def getBlockSizeBits(self): + bits = int(math.log(self._cache_line_size, 2)) + if 2**bits != self._cache_line_size.value: + raise Exception("Cache line size is not a power of 2!") + return bits + + def connectQueues(self, network): + self.prefetchQueue = MessageBuffer() + self.mandatoryQueue = MessageBuffer() + self.optionalQueue = MessageBuffer() + + # bufferToL1 and bufferFromL1 are ruby backend terminology. + # In stdlib terms, they are bufferToL2 and bufferFromL2 respectively. + # These buffers are connections between L1 cache and L2 cache. + # Later on, we'll need to connect those buffers to L2. + self.bufferToL1 = MessageBuffer(ordered=True) + self.bufferFromL1 = MessageBuffer(ordered=True) diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py new file mode 100644 index 0000000000..e29f566191 --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py @@ -0,0 +1,113 @@ +# Copyright (c) 2022 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .....processors.abstract_core import AbstractCore +from ......isas import ISA +from ......utils.override import * + +from m5.objects import ( + MessageBuffer, + RubyPrefetcher, + RubyCache, + ClockDomain, + L1Cache_Controller, +) + +import math + +# L1Cache_Controller is ruby backend's terminology corresponding to +# L2Cache in stdlib's terms +class L2Cache(L1Cache_Controller): + + _version = 0 + + @classmethod + def versionCount(cls): + cls._version += 1 + return cls._version - 1 + + def __init__( + self, + l2_size, + l2_assoc, + network, + core: AbstractCore, + num_l3Caches, + cache_line_size, + cluster_id, + target_isa: ISA, + clk_domain: ClockDomain, + ): + super().__init__() + + # This is the cache memory object that stores the cache data and tags + self.cache = RubyCache( + size=l2_size, + assoc=l2_assoc, + start_index_bit=self.getBlockSizeBits(), + is_icache=False, + ) + # l2_select_num_bits is ruby backend terminology. + # In stdlib terms, it is number of bits for selecting L3 cache. + self.l2_select_num_bits = int(math.log(num_l3Caches, 2)) + self.cluster_id = cluster_id + self.clk_domain = clk_domain + self.prefetcher = RubyPrefetcher() + self.transitions_per_cycle = 32 + # l1_request_latency, l1_response_latency, to_l2_latency are + # ruby backend terminology. + # In stdlib terms, they are L2 cache request latency, L2 response + # latency, and to L3 cache latency respectively. + self.l1_request_latency = 2 + self.l1_response_latency = 2 + self.to_l2_latency = 1 + + self.version = self.versionCount() + self._cache_line_size = cache_line_size + self.connectQueues(network) + + def connectQueues(self, network): + self.mandatoryQueue = MessageBuffer() + self.optionalQueue = MessageBuffer() + + # In the below terms, L2 are ruby backend terminology. + # They are L3 in stdlib. + + # Request from/to L2 buffers + self.requestFromL2 = MessageBuffer() + self.requestFromL2.in_port = network.out_port + self.requestToL2 = MessageBuffer() + self.requestToL2.out_port = network.in_port + + # Response from/to L2 buffers + self.responseFromL2 = MessageBuffer() + self.responseFromL2.in_port = network.out_port + self.responseToL2 = MessageBuffer() + self.responseToL2.out_port = network.in_port + + # Unblock to L2 buffer + self.unblockToL2 = MessageBuffer() + self.unblockToL2.out_port = network.in_port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py new file mode 100644 index 0000000000..6d46d1fdf0 --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py @@ -0,0 +1,89 @@ +# Copyright (c) 2022 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import MessageBuffer, RubyCache, L2Cache_Controller + +import math + +# L2Cache_Controller is ruby backend's terminology corresponding to +# L3 cache in stdlib. +class L3Cache(L2Cache_Controller): + + _version = 0 + + @classmethod + def versionCount(cls): + cls._version += 1 + return cls._version - 1 + + def __init__( + self, + l3_size, + l3_assoc, + network, + num_l3Caches, + cache_line_size, + cluster_id, + ): + super().__init__() + + # This is the cache memory object that stores the cache data and tags + self.L2cache = RubyCache( + size=l3_size, + assoc=l3_assoc, + start_index_bit=self.getIndexBit(num_l3Caches), + ) + + self.transitions_per_cycle = 4 + self.cluster_id = cluster_id + self.l2_request_latency = 2 + self.l2_response_latency = 2 + self.to_l1_latency = 1 + + self.version = self.versionCount() + self._cache_line_size = cache_line_size + self.connectQueues(network) + + def getIndexBit(self, num_l3caches): + l3_bits = int(math.log(num_l3caches, 2)) + bits = int(math.log(self._cache_line_size, 2)) + l3_bits + return bits + + def connectQueues(self, network): + # In the below terms, L1 and L2 are ruby backend terminology. + # In stdlib, they are L2 and L3 caches respectively. + self.DirRequestFromL2Cache = MessageBuffer() + self.DirRequestFromL2Cache.out_port = network.in_port + self.L1RequestFromL2Cache = MessageBuffer() + self.L1RequestFromL2Cache.out_port = network.in_port + self.responseFromL2Cache = MessageBuffer() + self.responseFromL2Cache.out_port = network.in_port + self.unblockToL2Cache = MessageBuffer() + self.unblockToL2Cache.in_port = network.out_port + self.L1RequestToL2Cache = MessageBuffer() + self.L1RequestToL2Cache.in_port = network.out_port + self.responseToL2Cache = MessageBuffer() + self.responseToL2Cache.in_port = network.out_port diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py new file mode 100644 index 0000000000..89b6b21177 --- /dev/null +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py @@ -0,0 +1,225 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from .abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy +from ..abstract_three_level_cache_hierarchy import ( + AbstractThreeLevelCacheHierarchy, +) +from ....coherence_protocol import CoherenceProtocol +from ....isas import ISA +from ...boards.abstract_board import AbstractBoard +from ....utils.requires import requires + +from .topologies.simple_pt2pt import SimplePt2Pt +from .caches.mesi_three_level.l1_cache import L1Cache +from .caches.mesi_three_level.l2_cache import L2Cache +from .caches.mesi_three_level.l3_cache import L3Cache +from .caches.mesi_three_level.directory import Directory +from .caches.mesi_three_level.dma_controller import DMAController + +from m5.objects import RubySystem, RubySequencer, DMASequencer, RubyPortProxy + + +class MESIThreeLevelCacheHierarchy( + AbstractRubyCacheHierarchy, AbstractThreeLevelCacheHierarchy +): + """A three-level private-L1-private-L2-shared-L3 MESI hierarchy. + + The on-chip network is a point-to-point all-to-all simple network. + """ + + def __init__( + self, + l1i_size: str, + l1i_assoc: str, + l1d_size: str, + l1d_assoc: str, + l2_size: str, + l2_assoc: str, + l3_size: str, + l3_assoc: str, + num_l3_banks: int, + ): + AbstractRubyCacheHierarchy.__init__(self=self) + AbstractThreeLevelCacheHierarchy.__init__( + self, + l1i_size=l1i_size, + l1i_assoc=l1i_assoc, + l1d_size=l1d_size, + l1d_assoc=l1d_assoc, + l2_size=l2_size, + l2_assoc=l2_assoc, + l3_size=l3_size, + l3_assoc=l3_assoc, + ) + + self._num_l3_banks = num_l3_banks + + def incorporate_cache(self, board: AbstractBoard) -> None: + + requires( + coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL + ) + + cache_line_size = board.get_cache_line_size() + + self.ruby_system = RubySystem() + + # MESI_Three_Level needs 3 virtual networks + self.ruby_system.number_of_virtual_networks = 3 + + self.ruby_system.network = SimplePt2Pt(self.ruby_system) + self.ruby_system.network.number_of_virtual_networks = 3 + + self._l1_controllers = [] + self._l2_controllers = [] + self._l3_controllers = [] + cores = board.get_processor().get_cores() + for core_idx, core in enumerate(cores): + l1_cache = L1Cache( + l1i_size=self._l1i_size, + l1i_assoc=self._l1i_assoc, + l1d_size=self._l1d_size, + l1d_assoc=self._l1d_assoc, + network=self.ruby_system.network, + core=core, + cache_line_size=cache_line_size, + target_isa=board.processor.get_isa(), + clk_domain=board.get_clock_domain(), + ) + + l1_cache.sequencer = RubySequencer( + version=core_idx, + dcache=l1_cache.Dcache, + clk_domain=l1_cache.clk_domain, + ) + + if board.has_io_bus(): + l1_cache.sequencer.connectIOPorts(board.get_io_bus()) + + l1_cache.ruby_system = self.ruby_system + + core.connect_icache(l1_cache.sequencer.in_ports) + core.connect_dcache(l1_cache.sequencer.in_ports) + + core.connect_walker_ports( + l1_cache.sequencer.in_ports, l1_cache.sequencer.in_ports + ) + + # Connect the interrupt ports + if board.get_processor().get_isa() == ISA.X86: + int_req_port = l1_cache.sequencer.interrupt_out_port + int_resp_port = l1_cache.sequencer.in_ports + core.connect_interrupt(int_req_port, int_resp_port) + else: + core.connect_interrupt() + + self._l1_controllers.append(l1_cache) + + # For testing purpose, we use point-to-point topology. So, the + # assigned cluster ID is ignored by ruby. + # Thus, we set cluster_id to 0. + l2_cache = L2Cache( + l2_size=self._l2_size, + l2_assoc=self._l2_assoc, + network=self.ruby_system.network, + core=core, + num_l3Caches=self._num_l3_banks, + cache_line_size=cache_line_size, + cluster_id=0, + target_isa=board.processor.get_isa(), + clk_domain=board.get_clock_domain(), + ) + + l2_cache.ruby_system = self.ruby_system + # L0Cache in the ruby backend is l1 cache in stdlib + # L1Cache in the ruby backend is l2 cache in stdlib + l2_cache.bufferFromL0 = l1_cache.bufferToL1 + l2_cache.bufferToL0 = l1_cache.bufferFromL1 + + self._l2_controllers.append(l2_cache) + + for _ in range(self._num_l3_banks): + l3_cache = L3Cache( + l3_size=self._l3_size, + l3_assoc=self._l3_assoc, + network=self.ruby_system.network, + num_l3Caches=self._num_l3_banks, + cache_line_size=cache_line_size, + cluster_id=0, # cluster_id is ignored in point-to-point topology + ) + l3_cache.ruby_system = self.ruby_system + self._l3_controllers.append(l3_cache) + + # TODO: Make this prettier: The problem is not being able to proxy + # the ruby system correctly + for cache in self._l3_controllers: + cache.ruby_system = self.ruby_system + + self._directory_controllers = [ + Directory(self.ruby_system.network, cache_line_size, range, port) + for range, port in board.get_mem_ports() + ] + # TODO: Make this prettier: The problem is not being able to proxy + # the ruby system correctly + for dir in self._directory_controllers: + dir.ruby_system = self.ruby_system + + self._dma_controllers = [] + if board.has_dma_ports(): + dma_ports = board.get_dma_ports() + for i, port in enumerate(dma_ports): + ctrl = DMAController(self.ruby_system.network, cache_line_size) + ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + self._dma_controllers.append(ctrl) + ctrl.ruby_system = self.ruby_system + + self.ruby_system.num_of_sequencers = len(self._l1_controllers) + len( + self._dma_controllers + ) + self.ruby_system.l1_controllers = self._l1_controllers + self.ruby_system.l2_controllers = self._l2_controllers + self.ruby_system.l3_controllers = self._l3_controllers + self.ruby_system.directory_controllers = self._directory_controllers + + if len(self._dma_controllers) != 0: + self.ruby_system.dma_controllers = self._dma_controllers + + # Create the network and connect the controllers. + self.ruby_system.network.connectControllers( + self._l1_controllers + + self._l2_controllers + + self._l3_controllers + + self._directory_controllers + + self._dma_controllers + ) + self.ruby_system.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.ruby_system.sys_port_proxy = RubyPortProxy() + board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) From d89d77f1c29a80d75e26c2683265e8891f5ac33e Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Wed, 12 Oct 2022 11:20:44 +0800 Subject: [PATCH 030/492] fastmodel: correct the Iris namespace for FastModel 11.19 Change-Id: I3f899699ce27ffdc5bbed311fec9f38c62027a80 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66071 Reviewed-by: Earl Ou Tested-by: kokoro Maintainer: Bobby Bruce --- src/arch/arm/fastmodel/iris/thread_context.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/arm/fastmodel/iris/thread_context.cc b/src/arch/arm/fastmodel/iris/thread_context.cc index b12536dbdd..45e020d832 100644 --- a/src/arch/arm/fastmodel/iris/thread_context.cc +++ b/src/arch/arm/fastmodel/iris/thread_context.cc @@ -441,9 +441,9 @@ void ThreadContext::readMem( iris::MemorySpaceId space, Addr addr, void *p, size_t size) { - iris::r0master::MemoryReadResult r; + iris::MemoryReadResult r; auto err = call().memory_read(_instId, r, space, addr, 1, size); - panic_if(err != iris::r0master::E_ok, "readMem failed."); + panic_if(err != iris::E_ok, "readMem failed."); std::memcpy(p, r.data.data(), size); } @@ -455,7 +455,7 @@ ThreadContext::writeMem( std::memcpy(data.data(), p, size); iris::MemoryWriteResult r; auto err = call().memory_write(_instId, r, space, addr, 1, size, data); - panic_if(err != iris::r0master::E_ok, "writeMem failed."); + panic_if(err != iris::E_ok, "writeMem failed."); } bool From c0d67cba3a4c2a3ad30b0c4e6c098bd20b56a91f Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Wed, 23 Nov 2022 11:00:23 +0800 Subject: [PATCH 031/492] systemc: fix extension not found TlmToGem5 bridge response path The gem5 packet has two ways to associate to the TLM payload. If the request is initiated from gem5, they would be associated by TLM extension. If the request is initiated from systemc, they would be associated by SenderState. So current implementation apparently only took care the request initiated from gem5 only. We need to update the logic to take care both. This change moves the response sync out of beginSendResp and sync it before calling the function. Change-Id: If415fbe33249b75e549086d9ca36eda3c20f7ec2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66053 Reviewed-by: Earl Ou Maintainer: Bobby Bruce Tested-by: kokoro --- src/systemc/tlm_bridge/tlm_to_gem5.cc | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.cc b/src/systemc/tlm_bridge/tlm_to_gem5.cc index 468ea83f37..47a2fba08e 100644 --- a/src/systemc/tlm_bridge/tlm_to_gem5.cc +++ b/src/systemc/tlm_bridge/tlm_to_gem5.cc @@ -207,14 +207,6 @@ void TlmToGem5Bridge::sendBeginResp(tlm::tlm_generic_payload &trans, sc_core::sc_time &delay) { - Gem5SystemC::Gem5Extension *extension = nullptr; - trans.get_extension(extension); - panic_if(extension == nullptr, - "Missing gem5 extension when sending BEGIN_RESP"); - auto pkt = extension->getPacket(); - - setPayloadResponse(trans, pkt); - tlm::tlm_phase phase = tlm::BEGIN_RESP; auto status = socket->nb_transport_bw(trans, phase, delay); @@ -252,6 +244,7 @@ TlmToGem5Bridge::handleBeginReq(tlm::tlm_generic_payload &trans) sendEndReq(trans); if (!needsResponse) { auto delay = sc_core::SC_ZERO_TIME; + setPayloadResponse(trans, pkt); sendBeginResp(trans, delay); } trans.release(); @@ -481,6 +474,8 @@ TlmToGem5Bridge::recvTimingResp(PacketPtr pkt) sc_assert(tlmSenderState != nullptr); auto &trans = tlmSenderState->trans; + setPayloadResponse(trans, pkt); + sendBeginResp(trans, delay); Gem5SystemC::Gem5Extension *extension = nullptr; trans.get_extension(extension); @@ -493,7 +488,6 @@ TlmToGem5Bridge::recvTimingResp(PacketPtr pkt) if (extension == nullptr) destroyPacket(pkt); - sendBeginResp(trans, delay); trans.release(); return true; @@ -512,12 +506,12 @@ TlmToGem5Bridge::recvReqRetry() bool needsResponse = pendingPacket->needsResponse(); if (bmp.sendTimingReq(pendingPacket)) { waitForRetry = false; - pendingPacket = nullptr; auto &trans = *pendingRequest; sendEndReq(trans); if (!needsResponse) { auto delay = sc_core::SC_ZERO_TIME; + setPayloadResponse(trans, pendingPacket); sendBeginResp(trans, delay); } trans.release(); From eee42275eeea15e4814b1f9df6709b3f69e87b22 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 18 Nov 2022 16:47:50 -0800 Subject: [PATCH 032/492] dev-amdgpu: Writeback RLC queue MQD when unmapped Currently when RLC queues (user mode queues) are mapped, the read/write pointers of the ring buffer are set to zero. However, these queues could be unmapped and then remapped later. In that situation the read/write pointers should be the previous value before unmapping occurred. Since the read pointer gets reset to zero, the queue begins reading from the start of the ring, which usually contains older packets. There is a 99% chance those packets contain addresses which are no longer in the page tables which will cause a page fault. To fix this we update the MQD with the current read/write pointer values and then writeback the MQD to memory when the queue is unmapped. This requires adding a pointer to the MQD and the host address of the MQD where it should be written back to. The interface for registering RLC queue is also simplified. Since we need to pass the MQD anyway, we can get values from it as well. Fixes b+tree and streamcluster from rodinia (when using RLC queues). Change-Id: Ie5dad4d7d90ea240c3e9f0cddf3e844a3cd34c4f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65791 Tested-by: kokoro Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair --- src/dev/amdgpu/pm4_packet_processor.cc | 4 +- src/dev/amdgpu/pm4_queues.hh | 24 +++++++++-- src/dev/amdgpu/sdma_engine.cc | 58 ++++++++++++++++++++++---- src/dev/amdgpu/sdma_engine.hh | 12 ++++-- 4 files changed, 79 insertions(+), 19 deletions(-) diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index f78f8333a6..152fd4da73 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -458,9 +458,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2); // Register RLC queue with SDMA - sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, - mqd->rb_base << 8, rlc_size, - rptr_wb_addr); + sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd); // Register doorbell with GPU device gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng); diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh index 8b6626d176..ddadd6543b 100644 --- a/src/dev/amdgpu/pm4_queues.hh +++ b/src/dev/amdgpu/pm4_queues.hh @@ -33,6 +33,8 @@ #ifndef __DEV_AMDGPU_PM4_QUEUES_HH__ #define __DEV_AMDGPU_PM4_QUEUES_HH__ +#include "dev/amdgpu/pm4_defines.hh" + namespace gem5 { @@ -201,10 +203,24 @@ typedef struct GEM5_PACKED }; uint64_t rb_base; }; - uint32_t sdmax_rlcx_rb_rptr; - uint32_t sdmax_rlcx_rb_rptr_hi; - uint32_t sdmax_rlcx_rb_wptr; - uint32_t sdmax_rlcx_rb_wptr_hi; + union + { + struct + { + uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_rptr_hi; + }; + uint64_t rptr; + }; + union + { + struct + { + uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_hi; + }; + uint64_t wptr; + }; uint32_t sdmax_rlcx_rb_wptr_poll_cntl; uint32_t sdmax_rlcx_rb_rptr_addr_hi; uint32_t sdmax_rlcx_rb_rptr_addr_lo; diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 02203c8178..4c03bf57b2 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -165,30 +165,40 @@ SDMAEngine::translate(Addr vaddr, Addr size) } void -SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, - Addr rptr_wb_addr) +SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd) { + uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1); + Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi; + rptr_wb_addr <<= 32; + rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo; + // Get first free RLC if (!rlc0.valid()) { DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell); rlcInfo[0] = doorbell; rlc0.valid(true); - rlc0.base(rb_base); + rlc0.base(mqd->rb_base << 8); + rlc0.size(rlc_size); rlc0.rptr(0); - rlc0.wptr(0); + rlc0.incRptr(mqd->rptr); + rlc0.setWptr(mqd->wptr); rlc0.rptrWbAddr(rptr_wb_addr); rlc0.processing(false); - rlc0.size(size); + rlc0.setMQD(mqd); + rlc0.setMQDAddr(mqdAddr); } else if (!rlc1.valid()) { DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell); rlcInfo[1] = doorbell; rlc1.valid(true); - rlc1.base(rb_base); + rlc1.base(mqd->rb_base << 8); + rlc1.size(rlc_size); rlc1.rptr(0); - rlc1.wptr(0); + rlc1.incRptr(mqd->rptr); + rlc1.setWptr(mqd->wptr); rlc1.rptrWbAddr(rptr_wb_addr); rlc1.processing(false); - rlc1.size(size); + rlc1.setMQD(mqd); + rlc1.setMQDAddr(mqdAddr); } else { panic("No free RLCs. Check they are properly unmapped."); } @@ -199,9 +209,37 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell) { DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell); if (rlcInfo[0] == doorbell) { + SDMAQueueDesc *mqd = rlc0.getMQD(); + if (mqd) { + DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n", + rlc0.getMQDAddr()); + + mqd->rptr = rlc0.globalRptr(); + mqd->wptr = rlc0.getWptr(); + + auto cb = new DmaVirtCallback( + [ = ] (const uint32_t &) { }); + dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd); + } else { + warn("RLC0 SDMAMQD address invalid\n"); + } rlc0.valid(false); rlcInfo[0] = 0; } else if (rlcInfo[1] == doorbell) { + SDMAQueueDesc *mqd = rlc1.getMQD(); + if (mqd) { + DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n", + rlc1.getMQDAddr()); + + mqd->rptr = rlc1.globalRptr(); + mqd->wptr = rlc1.getWptr(); + + auto cb = new DmaVirtCallback( + [ = ] (const uint32_t &) { }); + dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd); + } else { + warn("RLC1 SDMAMQD address invalid\n"); + } rlc1.valid(false); rlcInfo[1] = 0; } else { @@ -213,7 +251,9 @@ void SDMAEngine::deallocateRLCQueues() { for (auto doorbell: rlcInfo) { - unregisterRLCQueue(doorbell); + if (doorbell) { + unregisterRLCQueue(doorbell); + } } } diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh index 0bfee126c9..27c169193b 100644 --- a/src/dev/amdgpu/sdma_engine.hh +++ b/src/dev/amdgpu/sdma_engine.hh @@ -34,6 +34,7 @@ #include "base/bitunion.hh" #include "dev/amdgpu/amdgpu_device.hh" +#include "dev/amdgpu/pm4_queues.hh" #include "dev/amdgpu/sdma_packets.hh" #include "dev/dma_virt_device.hh" #include "params/SDMAEngine.hh" @@ -65,9 +66,11 @@ class SDMAEngine : public DmaVirtDevice SDMAQueue *_parent; SDMAQueue *_ib; SDMAType _type; + SDMAQueueDesc *_mqd; + Addr _mqd_addr = 0; public: SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false), - _parent(nullptr), _ib(nullptr), _type(SDMAGfx) {} + _parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {} Addr base() { return _base; } Addr rptr() { return _base + _rptr; } @@ -82,6 +85,8 @@ class SDMAEngine : public DmaVirtDevice SDMAQueue* parent() { return _parent; } SDMAQueue* ib() { return _ib; } SDMAType queueType() { return _type; } + SDMAQueueDesc* getMQD() { return _mqd; } + Addr getMQDAddr() { return _mqd_addr; } void base(Addr value) { _base = value; } @@ -114,6 +119,8 @@ class SDMAEngine : public DmaVirtDevice void parent(SDMAQueue* q) { _parent = q; } void ib(SDMAQueue* ib) { _ib = ib; } void queueType(SDMAType type) { _type = type; } + void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; } + void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; } }; /* SDMA Engine ID */ @@ -280,8 +287,7 @@ class SDMAEngine : public DmaVirtDevice /** * Methods for RLC queues */ - void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, - Addr rptr_wb_addr); + void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd); void unregisterRLCQueue(Addr doorbell); void deallocateRLCQueues(); From 8479a691aa57c8f5763b71d26d579697ae96d007 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 30 Nov 2022 15:02:05 -0800 Subject: [PATCH 033/492] stdlib,python: Allow setting of to tick exits via m5 This commit adds the following functions to the `m5` python module: - setMaxTick(tick) -> None - getMaxTick() -> int - getTicksUntilMax() -> int - scheduleTickExitFromCurrent(tick, exit_string) -> None - scheduleTickExitAbsolute(tick, exit_string) -> None Until this patch the only way to set an exit at a particular tick was via `simulate.run` which would reschedule the maximum tick. This functionality has been explicity exposed via the new `setMaxTick` function. However, as this is only rescheduling the maximum tick, it stops scheduling exits at multiple different ticks. To get around this problem the `scheduleTickExit` functions have been added. These allow a user to schedule multiple exit events. The functions contain a `exit_string` parameter that provides the string the simulator is to return when the specified tick is met. By default this string is "Tick exit reached" which is used by the stdlib Simulator module to declare a new `SCHEDULED_TICK` exit event (Note: this has been deliberatly kept seperate from the `MAX_TICK` exit event. This commit serves as an attempt to decouple these are two concepts). Tests are provided in this patch to ensure these new functions work as intended. Additional notes: - The `simulate` function has been fixed to match the documentation. If the `num_cycles` is -1 then the maximum ticks is set to MaxTicks. Otherwise the max ticks is set to `curTicks() + num_cycles`. The functionality of this function will remain unchanged to the end-user. - Full integration into the Simulator module is not complete as of this patch. Users must us the m5 python module to set these exit events. Change-Id: I6c92b31dd409dc866152224600ea8166cfcba38b Issue-on: https://gem5.atlassian.net/browse/GEM5-1131 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66231 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/python/gem5/simulate/exit_event.py | 3 + src/python/gem5/simulate/simulator.py | 2 + src/python/m5/simulate.py | 59 +++++- src/python/pybind11/event.cc | 4 + src/sim/simulate.cc | 53 +++++- src/sim/simulate.hh | 32 +++- tests/gem5/to_tick/configs/tick-exit.py | 100 ++++++++++ tests/gem5/to_tick/configs/tick-to-max.py | 123 +++++++++++++ .../to_tick/ref/tick-exit-10-20-30-40.txt | 6 + tests/gem5/to_tick/ref/tick-exit-100.txt | 3 + .../ref/tick-to-max-at-execution-100.txt | 4 + ...-to-max-at-execution-and-after-100-200.txt | 4 + .../ref/tick-to-max-before-execution-250.txt | 4 + tests/gem5/to_tick/test_to_tick.py | 174 ++++++++++++++++++ 14 files changed, 561 insertions(+), 10 deletions(-) create mode 100644 tests/gem5/to_tick/configs/tick-exit.py create mode 100644 tests/gem5/to_tick/configs/tick-to-max.py create mode 100644 tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt create mode 100644 tests/gem5/to_tick/ref/tick-exit-100.txt create mode 100644 tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt create mode 100644 tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt create mode 100644 tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt create mode 100644 tests/gem5/to_tick/test_to_tick.py diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py index 089017806b..1e14fdd11a 100644 --- a/src/python/gem5/simulate/exit_event.py +++ b/src/python/gem5/simulate/exit_event.py @@ -42,6 +42,7 @@ class ExitEvent(Enum): SWITCHCPU = "switchcpu" # An exit needed to switch CPU cores. FAIL = "fail" # An exit because the simulation has failed. CHECKPOINT = "checkpoint" # An exit to load a checkpoint. + SCHEDULED_TICK = "scheduled tick exit" MAX_TICK = "max tick" # An exit due to a maximum tick value being met. USER_INTERRUPT = ( # An exit due to a user interrupt (e.g., cntr + c) "user interupt" @@ -75,6 +76,8 @@ class ExitEvent(Enum): return ExitEvent.EXIT elif exit_string == "simulate() limit reached": return ExitEvent.MAX_TICK + elif exit_string == "Tick exit reached": + return ExitEvent.SCHEDULED_TICK elif exit_string == "switchcpu": return ExitEvent.SWITCHCPU elif exit_string == "m5_fail instruction encountered": diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py index 1d0d3ecc66..e27679a996 100644 --- a/src/python/gem5/simulate/simulator.py +++ b/src/python/gem5/simulate/simulator.py @@ -157,6 +157,7 @@ class Simulator: * ExitEvent.WORKEND: exit simulation * ExitEvent.USER_INTERRUPT: exit simulation * ExitEvent.MAX_TICK: exit simulation + * ExitEvent.SCHEDULED_TICK: exit simulation * ExitEvent.SIMPOINT_BEGIN: reset stats * ExitEvent.MAX_INSTS: exit simulation @@ -197,6 +198,7 @@ class Simulator: )(), ExitEvent.USER_INTERRUPT: exit_generator(), ExitEvent.MAX_TICK: exit_generator(), + ExitEvent.SCHEDULED_TICK: exit_generator(), ExitEvent.SIMPOINT_BEGIN: warn_default_decorator( reset_stats_generator, "simpoint begin", diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index a47d4cacd6..744d95f9f6 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -54,7 +54,7 @@ from . import params from m5.util.dot_writer import do_dot, do_dvfs_dot from m5.util.dot_writer_ruby import do_ruby_dot -from .util import fatal +from .util import fatal, warn from .util import attrdict # define a MaxTick parameter, unsigned 64 bit @@ -205,6 +205,63 @@ def simulate(*args, **kwargs): return sim_out +def setMaxTick(tick: int) -> None: + """Sets the maximum tick the simulation may run to. When when using the + stdlib simulator module, reaching this max tick triggers a + `ExitEvent.MAX_TICK` exit event. + + :param tick: the maximum tick (absolute, not relative to the current tick). + """ + if tick <= curTick(): + warn("Max tick scheduled for the past. This will not be triggered.") + _m5.event.setMaxTick(tick=tick) + + +def getMaxTick() -> int: + """Returns the current maximum tick.""" + return _m5.event.getMaxTick() + + +def getTicksUntilMax() -> int: + """Returns the current number of ticks until the maximum tick.""" + return getMaxTick() - curTick() + + +def scheduleTickExitFromCurrent( + ticks: int, exit_string: str = "Tick exit reached" +) -> None: + """Schedules a tick exit event from the current tick. I.e., if ticks == 100 + then an exit event will be scheduled at tick `curTick() + 100`. + + The default `exit_string` value is used by the stdlib Simulator module to + declare this exit event as `ExitEvent.SCHEDULED_TICK`. + + :param ticks: The simulation ticks, from `curTick()` to schedule the exit + event. + :param exit_string: The exit string to return when the exit event is + triggered. + """ + scheduleTickExitAbsolute(tick=ticks + curTick(), exit_string=exit_string) + + +def scheduleTickExitAbsolute( + tick: int, exit_string: str = "Tick exit reached" +) -> None: + """Schedules a tick exit event using absolute ticks. I.e., if tick == 100 + then an exit event will be scheduled at tick 100. + + The default `exit_string` value is used by the stdlib Simulator module to + declare this exit event as `ExitEvent.SCHEDULED_TICK`. + + :param tick: The absolute simulation tick to schedule the exit event. + :param exit_string: The exit string to return when the exit event is + triggered. + """ + if tick <= curTick(): + warn("Tick exit scheduled for the past. This will not be triggered.") + _m5.event.scheduleTickExit(tick=tick, exit_string=exit_string) + + def drain(): """Drain the simulator in preparation of a checkpoint or memory mode switch. diff --git a/src/python/pybind11/event.cc b/src/python/pybind11/event.cc index 7a02221611..827768f52f 100644 --- a/src/python/pybind11/event.cc +++ b/src/python/pybind11/event.cc @@ -107,6 +107,10 @@ pybind_init_event(py::module_ &m_native) m.def("simulate", &simulate, py::arg("ticks") = MaxTick); + m.def("setMaxTick", &set_max_tick, py::arg("tick")); + m.def("getMaxTick", &get_max_tick, py::return_value_policy::copy); + m.def("scheduleTickExit", &schedule_tick_exit, py::arg("tick"), + py::arg("exit_string")); m.def("terminateEventQueueThreads", &terminateEventQueueThreads); m.def("exitSimLoop", &exitSimLoop); m.def("getEventQueue", []() { return curEventQueue(); }, diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc index 0c30f10570..f147b3ec77 100644 --- a/src/sim/simulate.cc +++ b/src/sim/simulate.cc @@ -180,8 +180,8 @@ struct DescheduleDeleter }; /** Simulate for num_cycles additional cycles. If num_cycles is -1 - * (the default), do not limit simulation; some other event must - * terminate the loop. Exported to Python. + * (the default), we simulate to MAX_TICKS unless the max ticks has been set + * via the 'set_max_tick' function prior. This function is exported to Python. * @return The SimLoopExitEvent that caused the loop to exit. */ GlobalSimLoopExitEvent *global_exit_event= nullptr; @@ -191,8 +191,6 @@ simulate(Tick num_cycles) if (global_exit_event)//cleaning last global exit event global_exit_event->clean(); std::unique_ptr quantum_event; - const Tick exit_tick = num_cycles < MaxTick - curTick() ? - curTick() + num_cycles : MaxTick; inform("Entering event queue @ %d. Starting simulation...\n", curTick()); @@ -200,11 +198,22 @@ simulate(Tick num_cycles) simulatorThreads.reset(new SimulatorThreads(numMainEventQueues)); if (!simulate_limit_event) { - simulate_limit_event = new GlobalSimLoopExitEvent( - mainEventQueue[0]->getCurTick(), - "simulate() limit reached", 0); + // If the simulate_limit_event is not set, we set it to MaxTick. + set_max_tick(MaxTick); + } + + if (num_cycles != -1) { + // If the user has specified an exit event after X cycles, do so here. + // Note: This will override any prior set max_tick behaviour (such as + // that above when it is set to MAxTick). + const Tick max_tick = num_cycles < MaxTick - curTick() ? + curTick() + num_cycles : MaxTick; + + // This is kept to `set_max_tick` instead of `schedule_tick_exit` to + // preserve backwards functionality. It may be better to deprecate this + // behaviour at some point in favor of `schedule_tick_exit`. + set_max_tick(max_tick); } - simulate_limit_event->reschedule(exit_tick); if (numMainEventQueues > 1) { fatal_if(simQuantum == 0, @@ -234,6 +243,34 @@ simulate(Tick num_cycles) return global_exit_event; } +void set_max_tick(Tick tick) +{ + if (!simulate_limit_event) { + simulate_limit_event = new GlobalSimLoopExitEvent( + mainEventQueue[0]->getCurTick(), + "simulate() limit reached", 0); + } + simulate_limit_event->reschedule(tick); +} + + +Tick get_max_tick() +{ + if (!simulate_limit_event) { + /* If the GlobalSimLoopExitEvent has not been setup, the maximum tick + * is `MaxTick` as declared in "src/base/types.hh". + */ + return MaxTick; + } + + return simulate_limit_event->when(); +} + +void schedule_tick_exit(Tick tick, std::string exit_string) +{ + new GlobalSimLoopExitEvent(tick, exit_string, 0); +} + void terminateEventQueueThreads() { diff --git a/src/sim/simulate.hh b/src/sim/simulate.hh index 5ef499541f..e7c4fa640c 100644 --- a/src/sim/simulate.hh +++ b/src/sim/simulate.hh @@ -45,7 +45,37 @@ namespace gem5 class GlobalSimLoopExitEvent; -GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick); +GlobalSimLoopExitEvent *simulate(Tick num_cycles = -1); + +/** + * @brief Set the maximum tick. + * + * This function will schedule, or reschedule, the maximum tick for the + * simulation. + * + * This will setup the GlobalSimLoopExitEvent if it does not already exist. + * + * @param tick The maximum tick. + */ +void set_max_tick(Tick tick); + +/** + * @brief Get the maximum simulation tick. + * + * + * @returns The maximum simulation tick. + */ +Tick get_max_tick(); + +/** + * @brief Schedule an exit event at a particular tick. + * + * Schedule a tick with a particular exit string. + * + * @param tick The tick at which the simulation loop should exit. + * @param exit_string The exit string explaining the exit. + */ +void schedule_tick_exit(Tick tick, std::string exit_string); /** * Terminate helper threads when running in parallel mode. diff --git a/tests/gem5/to_tick/configs/tick-exit.py b/tests/gem5/to_tick/configs/tick-exit.py new file mode 100644 index 0000000000..9b412cbfb6 --- /dev/null +++ b/tests/gem5/to_tick/configs/tick-exit.py @@ -0,0 +1,100 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" + +""" + +from gem5.resources.resource import Resource +from gem5.isas import ISA +from gem5.components.memory import SingleChannelDDR3_1600 +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.cachehierarchies.classic.no_cache import NoCache +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent + +import m5 + +import argparse + +parser = argparse.ArgumentParser() + +parser.add_argument( + "-t", + "--tick-exits", + type=int, + nargs="+", + required=True, + help="Set the tick exits to exit.", +) + +parser.add_argument( + "-r", + "--resource-directory", + type=str, + required=False, + help="The directory in which resources will be downloaded or exist.", +) + +args = parser.parse_args() + +# Setup the system. +motherboard = SimpleBoard( + clk_freq="3GHz", + processor=SimpleProcessor( + cpu_type=CPUTypes.TIMING, + isa=ISA.X86, + num_cores=1, + ), + memory=SingleChannelDDR3_1600(), + cache_hierarchy=NoCache(), +) + +# Set the workload +binary = Resource( + "x86-hello64-static", resource_directory=args.resource_directory +) +motherboard.set_se_binary_workload(binary) + + +def scheduled_tick_generator(): + while True: + print(f"Exiting at: {m5.curTick()}") + yield False + + +# Run the simulation +simulator = Simulator( + board=motherboard, + on_exit_event={ExitEvent.SCHEDULED_TICK: scheduled_tick_generator()}, +) + +for tick in args.tick_exits: + m5.scheduleTickExitFromCurrent(tick) + +simulator.run() diff --git a/tests/gem5/to_tick/configs/tick-to-max.py b/tests/gem5/to_tick/configs/tick-to-max.py new file mode 100644 index 0000000000..2b679df412 --- /dev/null +++ b/tests/gem5/to_tick/configs/tick-to-max.py @@ -0,0 +1,123 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This configuration script is used to test running a simulation to a specified +maximum tick. This script was setup to test setting the number of ticks to +run before, at, or after the running of `simulator.run`. + +**Note:** There can only ever be one MAX_TICK exit event scheduled at any one +time. +""" + +from gem5.resources.resource import Resource +from gem5.isas import ISA +from gem5.components.memory import SingleChannelDDR3_1600 +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.cachehierarchies.classic.no_cache import NoCache +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.simulate.simulator import Simulator + +import m5 + +import argparse + +parser = argparse.ArgumentParser() + +parser.add_argument( + "-b", + "--set-ticks-before", + type=int, + required=False, + help="Set the number of ticks to run to prior to executing " + "`simulator.run`.", +) + +parser.add_argument( + "-e", + "--set-ticks-at-execution", + type=int, + required=False, + help="Set the number of ticks to run via `simulator.run`.", +) + +parser.add_argument( + "-a", + "--set-ticks-after", + type=int, + required=False, + help="Set the number of ticks to run after `simulator.run` has ceased " + "execution.", +) + +parser.add_argument( + "-r", + "--resource-directory", + type=str, + required=False, + help="The directory in which resources will be downloaded or exist.", +) + +args = parser.parse_args() + +# Setup the system. +motherboard = SimpleBoard( + clk_freq="3GHz", + processor=SimpleProcessor( + cpu_type=CPUTypes.TIMING, + isa=ISA.X86, + num_cores=1, + ), + memory=SingleChannelDDR3_1600(), + cache_hierarchy=NoCache(), +) + +# Set the workload +binary = Resource( + "x86-hello64-static", resource_directory=args.resource_directory +) +motherboard.set_se_binary_workload(binary) + +# Set the max ticks before setting up the simulation, if applicable. +if args.set_ticks_before: + m5.setMaxTick(args.set_ticks_before) + +# Run the simulation +simulator = Simulator(board=motherboard) + +if args.set_ticks_at_execution: + simulator.run(max_ticks=args.set_ticks_at_execution) +else: + simulator.run() + +# Set the max ticks after the simulator run. +if args.set_ticks_after: + m5.setMaxTick(args.set_ticks_after) + +print(f"Current Tick: {m5.curTick()}") +print(f"Current Max Tick: {m5.getMaxTick()}") +print(f"Ticks until max: {m5.getTicksUntilMax()}") diff --git a/tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt b/tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt new file mode 100644 index 0000000000..05f8159065 --- /dev/null +++ b/tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt @@ -0,0 +1,6 @@ +Global frequency set at 1000000000000 ticks per second +Exiting at: 10 +Exiting at: 20 +Exiting at: 30 +Exiting at: 40 +Hello world! diff --git a/tests/gem5/to_tick/ref/tick-exit-100.txt b/tests/gem5/to_tick/ref/tick-exit-100.txt new file mode 100644 index 0000000000..62f9330e13 --- /dev/null +++ b/tests/gem5/to_tick/ref/tick-exit-100.txt @@ -0,0 +1,3 @@ +Global frequency set at 1000000000000 ticks per second +Exiting at: 100 +Hello world! diff --git a/tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt b/tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt new file mode 100644 index 0000000000..1507716e42 --- /dev/null +++ b/tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt @@ -0,0 +1,4 @@ +Global frequency set at 1000000000000 ticks per second +Current Tick: 100 +Current Max Tick: 100 +Ticks until max: 0 diff --git a/tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt b/tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt new file mode 100644 index 0000000000..b1cde8ae4c --- /dev/null +++ b/tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt @@ -0,0 +1,4 @@ +Global frequency set at 1000000000000 ticks per second +Current Tick: 100 +Current Max Tick: 200 +Ticks until max: 100 diff --git a/tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt b/tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt new file mode 100644 index 0000000000..b26e9ebee2 --- /dev/null +++ b/tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt @@ -0,0 +1,4 @@ +Global frequency set at 1000000000000 ticks per second +Current Tick: 250 +Current Max Tick: 250 +Ticks until max: 0 diff --git a/tests/gem5/to_tick/test_to_tick.py b/tests/gem5/to_tick/test_to_tick.py new file mode 100644 index 0000000000..ba5bcbf9b9 --- /dev/null +++ b/tests/gem5/to_tick/test_to_tick.py @@ -0,0 +1,174 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from testlib import * + +if config.bin_path: + resource_path = config.bin_path +else: + resource_path = joinpath(absdirpath(__file__), "..", "resources") + +# This test sets the tick to max tick via the `simulator.run` function. This is +# set to 100. Therefore, at the end of the execution the expected current tick +# should be 100, with the max tick still 100. The number of expected ticks to +# max is therefore 0. +gem5_verify_config( + name="test-to-max-tick-at-execution-100", + verifiers=[ + verifier.MatchStdoutNoPerf( + joinpath(getcwd(), "ref", "tick-to-max-at-execution-100.txt") + ) + ], + fixtures=(), + config=joinpath( + config.base_dir, + "tests", + "gem5", + "to_tick", + "configs", + "tick-to-max.py", + ), + config_args=[ + "--resource-directory", + resource_path, + "--set-ticks-at-execution", + "100", + ], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) + +# This test sets the max tick via the `simulator.run` function at tick 100. +# The `m5.setMaxTick` function is then called after, passing the value 200 . +# This means at the end of execution the current tick is 100, and the max tick +# is 200. The number of expected ticks to max is therefore 100. +gem5_verify_config( + name="test-to-max-tick-at-execution-and-after-100-200", + verifiers=[ + verifier.MatchStdoutNoPerf( + joinpath( + getcwd(), + "ref", + "tick-to-max-at-execution-and-after-100-200.txt", + ) + ) + ], + fixtures=(), + config=joinpath( + config.base_dir, + "tests", + "gem5", + "to_tick", + "configs", + "tick-to-max.py", + ), + config_args=[ + "--resource-directory", + resource_path, + "--set-ticks-at-execution", + "100", + "--set-ticks-after", + "200", + ], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) + +# This test sets the max tick to 250 via the `m5.setMaxTick` prior to running +# `simulator.run`. This means at the end of execution the current tick is 250 +# and the max tick is 250. The expected number of ticks to max is therefore 0. +gem5_verify_config( + name="test-to-max-tick-before-execution-250", + verifiers=[ + verifier.MatchStdoutNoPerf( + joinpath(getcwd(), "ref", "tick-to-max-before-execution-250.txt") + ) + ], + fixtures=(), + config=joinpath( + config.base_dir, + "tests", + "gem5", + "to_tick", + "configs", + "tick-to-max.py", + ), + config_args=[ + "--resource-directory", + resource_path, + "--set-ticks-before", + "250", + ], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) + +# Tests the scheduling of a tick exit event at tick 100. +gem5_verify_config( + name="test-to-tick-exit-100", + verifiers=[ + verifier.MatchStdoutNoPerf( + joinpath(getcwd(), "ref", "tick-exit-100.txt") + ) + ], + fixtures=(), + config=joinpath( + config.base_dir, "tests", "gem5", "to_tick", "configs", "tick-exit.py" + ), + config_args=["--resource-directory", resource_path, "--tick-exits", "100"], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) + +# Tests the scheduling of a tick exit event at tick 10, 20, 30, and 40. +gem5_verify_config( + name="test-to-tick-exit-10-20-30-40", + verifiers=[ + verifier.MatchStdoutNoPerf( + joinpath(getcwd(), "ref", "tick-exit-10-20-30-40.txt") + ) + ], + fixtures=(), + config=joinpath( + config.base_dir, "tests", "gem5", "to_tick", "configs", "tick-exit.py" + ), + config_args=[ + "--resource-directory", + resource_path, + "--tick-exits", + "10", + "20", + "30", + "40", + ], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) From da83764f9438ad0d81f0934567931d4a595f26dd Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Tue, 18 Oct 2022 16:04:18 -0700 Subject: [PATCH 034/492] stdlib, configs: Updating configs/example/gem5_library This commit updates all of the older tests in this directory to use the Simulator to run instead of m5.simulate() Change-Id: I2a81d5c2f27c89e8c03abb0203ca3e58a6688672 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64791 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce --- .../gem5_library/x86-gapbs-benchmarks.py | 101 +++--------- .../gem5_library/x86-npb-benchmarks.py | 145 ++++++------------ .../gem5_library/x86-parsec-benchmarks.py | 110 +++---------- .../x86-spec-cpu2006-benchmarks.py | 117 +++----------- .../x86-spec-cpu2017-benchmarks.py | 109 +++---------- 5 files changed, 144 insertions(+), 438 deletions(-) diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py index bdc0d9427d..638d34b599 100644 --- a/configs/example/gem5_library/x86-gapbs-benchmarks.py +++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py @@ -64,8 +64,8 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.coherence_protocol import CoherenceProtocol from gem5.resources.resource import Resource - -from m5.stats.gem5stats import get_simstat +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent requires( isa_required=ISA.X86, @@ -210,14 +210,26 @@ board.set_kernel_disk_workload( readfile_contents=command, ) -root = Root(full_system=True, system=board) -# sim_quantum must be set when KVM cores are used. +def handle_exit(): + print("Done booting Linux") + print("Resetting stats at the start of ROI!") + m5.stats.reset() + global start_tick + start_tick = m5.curTick() + processor.switch() + yield False # E.g., continue the simulation. + print("Dump stats at the end of the ROI!") + m5.stats.dump() + yield True # Stop the simulation. We're done. -root.sim_quantum = int(1e9) -board._pre_instantiate() -m5.instantiate() +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.EXIT: handle_exit(), + }, +) # We maintain the wall clock time. @@ -232,74 +244,8 @@ print("Using KVM cpu") # the first ROI annotation in details. The X86Board currently does not support # `work items started count reached`. -exit_event = m5.simulate() - -# The first exit_event ends with a `workbegin` cause. This means that the -# system started successfully and the execution on the program started. The -# ROI begin is encountered. - -if exit_event.getCause() == "workbegin": - - print("Done booting Linux") - print("Resetting stats at the start of ROI!") - - m5.stats.reset() - start_tick = m5.curTick() - - # We have completed up to this step using KVM cpu. Now we switch to timing - # cpu for detailed simulation. - - processor.switch() -else: - print("Unexpected termination of simulation before ROI was reached!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# The next exit_event is to simulate the ROI. It should be exited with a cause -# marked by `workend`. This implies that the first annotation is successfully -# completed. - -exit_event = m5.simulate() - -# Reached the end of first ROI. -# We dump the stats here. - -# We exepect that ROI ends with `workend`. Otherwise the simulation ended -# unexpectedly. -if exit_event.getCause() == "workend": - print("Dump stats at the end of the ROI!") - - m5.stats.dump() - end_tick = m5.curTick() -else: - print("Unexpected termination of simulation while ROI was being executed!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# We get simInsts using get_simstat and output it in the final print statement. - -gem5stats = get_simstat(root) - -# We get the number of committed instructions from the timing cores. We then -# sum and print them at the end. - -roi_insts = float( - gem5stats.to_json()["system"]["processor"]["switch0"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) + float( - gem5stats.to_json()["system"]["processor"]["switch1"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) +simulator.run() +end_tick = m5.curTick() # Since we simulated the ROI in details, therefore, simulation is over at this # point. @@ -313,8 +259,9 @@ print() print("Performance statistics:") print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12)) -print("Instructions executed in ROI: %d" % ((roi_insts))) -print("Ran a total of", m5.curTick() / 1e12, "simulated seconds") +print( + "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds" +) print( "Total wallclock time: %.2fs, %.2f min" % (time.time() - globalStart, (time.time() - globalStart) / 60) diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py index 385760c7a7..2cb314303f 100644 --- a/configs/example/gem5_library/x86-npb-benchmarks.py +++ b/configs/example/gem5_library/x86-npb-benchmarks.py @@ -61,6 +61,8 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.coherence_protocol import CoherenceProtocol from gem5.resources.resource import Resource +from gem5.simulate.simulator import Simulator +from gem5.simulate.simulator import ExitEvent from m5.stats.gem5stats import get_simstat from m5.util import warn @@ -209,17 +211,47 @@ board.set_kernel_disk_workload( readfile_contents=command, ) -# We need this for long running processes. -m5.disableAllListeners() +# The first exit_event ends with a `workbegin` cause. This means that the +# system started successfully and the execution on the program started. +def handle_workbegin(): + print("Done booting Linux") + print("Resetting stats at the start of ROI!") -root = Root(full_system=True, system=board) + m5.stats.reset() -# sim_quantum must be set when KVM cores are used. + # We have completed up to this step using KVM cpu. Now we switch to timing + # cpu for detailed simulation. -root.sim_quantum = int(1e9) + # # Next, we need to check if the user passed a value for --ticks. If yes, + # then we limit out execution to this number of ticks during the ROI. + # Otherwise, we simulate until the ROI ends. + processor.switch() + if args.ticks: + # schedule an exit event for this amount of ticks in the future. + # The simulation will then continue. + m5.scheduleTickExitFromCurrent(args.ticks) + yield False -board._pre_instantiate() -m5.instantiate() + +# The next exit_event is to simulate the ROI. It should be exited with a cause +# marked by `workend`. + +# We exepect that ROI ends with `workend` or `simulate() limit reached`. +# Otherwise the simulation ended unexpectedly. +def handle_workend(): + print("Dump stats at the end of the ROI!") + + m5.stats.dump() + yield False + + +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.WORKBEGIN: handle_workbegin(), + ExitEvent.WORKEND: handle_workend(), + }, +) # We maintain the wall clock time. @@ -229,96 +261,12 @@ print("Running the simulation") print("Using KVM cpu") # We start the simulation. - -exit_event = m5.simulate() - -# The first exit_event ends with a `workbegin` cause. This means that the -# system started successfully and the execution on the program started. - -if exit_event.getCause() == "workbegin": - - print("Done booting Linux") - print("Resetting stats at the start of ROI!") - - m5.stats.reset() - start_tick = m5.curTick() - - # We have completed up to this step using KVM cpu. Now we switch to timing - # cpu for detailed simulation. - - processor.switch() -else: - # `workbegin` call was never encountered. - - print("Unexpected termination of simulation before ROI was reached!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# The next exit_event is to simulate the ROI. It should be exited with a cause -# marked by `workend`. - -# Next, we need to check if the user passed a value for --ticks. If yes, -# then we limit out execution to this number of ticks during the ROI. -# Otherwise, we simulate until the ROI ends. -if args.ticks: - exit_event = m5.simulate(args.ticks) -else: - exit_event = m5.simulate() - - -# Reached the end of ROI. -# We dump the stats here. - -# We exepect that ROI ends with `workend` or `simulate() limit reached`. -# Otherwise the simulation ended unexpectedly. -if exit_event.getCause() == "workend": - print("Dump stats at the end of the ROI!") - - m5.stats.dump() - end_tick = m5.curTick() -elif ( - exit_event.getCause() == "simulate() limit reached" - and args.ticks is not None -): - print("Dump stats at the end of {} ticks in the ROI".format(args.ticks)) - - m5.stats.dump() - end_tick = m5.curTick() -else: - print("Unexpected termination of simulation while ROI was being executed!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) +simulator.run() # We need to note that the benchmark is not executed completely till this # point, but, the ROI has. We collect the essential statistics here before # resuming the simulation again. -# We get simInsts using get_simstat and output it in the final -# print statement. - -gem5stats = get_simstat(root) - -# We get the number of committed instructions from the timing -# cores. We then sum and print them at the end. - -roi_insts = float( - gem5stats.to_json()["system"]["processor"]["switch0"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) + float( - gem5stats.to_json()["system"]["processor"]["switch1"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) - # Simulation is over at this point. We acknowledge that all the simulation # events were successful. print("All simulation events were successful.") @@ -328,9 +276,16 @@ print("Done with the simulation") print() print("Performance statistics:") -print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12)) -print("Instructions executed in ROI: %d" % ((roi_insts))) -print("Ran a total of", m5.curTick() / 1e12, "simulated seconds") +# manually calculate ROI time if ticks arg is used in case the +# entire ROI wasn't simulated +if args.ticks: + print(f"Simulated time in ROI (to tick): {args.ticks/ 1e12}s") +else: + print(f"Simulated time in ROI: {simulator.get_roi_ticks()[0] / 1e12}s") + +print( + f"Ran a total of {simulator.get_current_tick() / 1e12} simulated seconds" +) print( "Total wallclock time: %.2fs, %.2f min" % (time.time() - globalStart, (time.time() - globalStart) / 60) diff --git a/configs/example/gem5_library/x86-parsec-benchmarks.py b/configs/example/gem5_library/x86-parsec-benchmarks.py index 82183802c7..190c0a0980 100644 --- a/configs/example/gem5_library/x86-parsec-benchmarks.py +++ b/configs/example/gem5_library/x86-parsec-benchmarks.py @@ -60,8 +60,8 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.coherence_protocol import CoherenceProtocol from gem5.resources.resource import Resource - -from m5.stats.gem5stats import get_simstat +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent # We check for the required gem5 build. @@ -195,17 +195,28 @@ board.set_kernel_disk_workload( readfile_contents=command, ) -# We need this for long running processes. -m5.disableAllListeners() +# functions to handle different exit events during the simuation +def handle_workbegin(): + print("Done booting Linux") + print("Resetting stats at the start of ROI!") + m5.stats.reset() + processor.switch() + yield False -root = Root(full_system=True, system=board) -# sim_quantum must be set if KVM cores are used. +def handle_workend(): + print("Dump stats at the end of the ROI!") + m5.stats.dump() + yield True -root.sim_quantum = int(1e9) -board._pre_instantiate() -m5.instantiate() +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.WORKBEGIN: handle_workbegin(), + ExitEvent.WORKEND: handle_workend(), + }, +) # We maintain the wall clock time. @@ -214,83 +225,11 @@ globalStart = time.time() print("Running the simulation") print("Using KVM cpu") -start_tick = m5.curTick() -end_tick = m5.curTick() m5.stats.reset() # We start the simulation +simulator.run() -exit_event = m5.simulate() - -# The first exit_event ends with a `workbegin` cause. This means that the -# system booted successfully and the execution on the program started. - -if exit_event.getCause() == "workbegin": - - print("Done booting Linux") - print("Resetting stats at the start of ROI!") - - m5.stats.reset() - start_tick = m5.curTick() - - # We have completed up to this step using KVM cpu. Now we switch to timing - # cpu for detailed simulation. - - processor.switch() -else: - # `workbegin` call was never encountered. - - print("Unexpected termination of simulation before ROI was reached!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# The next exit_event is to simulate the ROI. It should be exited with a cause -# marked by `workend`. - -exit_event = m5.simulate() - -# Reached the end of ROI. -# We dump the stats here. - -# We exepect that ROI ends with `workend`. Otherwise the simulation ended -# unexpectedly. -if exit_event.getCause() == "workend": - print("Dump stats at the end of the ROI!") - - m5.stats.dump() - end_tick = m5.curTick() -else: - print("Unexpected termination of simulation while ROI was being executed!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# ROI has ended here, and we get `simInsts` using get_simstat and print it in -# the final print statement. - -gem5stats = get_simstat(root) - -# We get the number of committed instructions from the timing -# cores. We then sum and print them at the end. -roi_insts = float( - gem5stats.to_json()["system"]["processor"]["switch0"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) + float( - gem5stats.to_json()["system"]["processor"]["switch1"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) - -# Simulation is over at this point. We acknowledge that all the simulation -# events were successful. print("All simulation events were successful.") # We print the final simulation statistics. @@ -299,9 +238,10 @@ print("Done with the simulation") print() print("Performance statistics:") -print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12)) -print("Instructions executed in ROI: %d" % ((roi_insts))) -print("Ran a total of", m5.curTick() / 1e12, "simulated seconds") +print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0])))) +print( + "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds" +) print( "Total wallclock time: %.2fs, %.2f min" % (time.time() - globalStart, (time.time() - globalStart) / 60) diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py index d656e61145..8f39f49e2e 100644 --- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py +++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py @@ -66,6 +66,8 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.coherence_protocol import CoherenceProtocol from gem5.resources.resource import Resource, CustomDiskImageResource +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent from m5.stats.gem5stats import get_simstat from m5.util import warn @@ -265,17 +267,23 @@ board.set_kernel_disk_workload( readfile_contents=command, ) -# We need this for long running processes. -m5.disableAllListeners() -root = Root(full_system=True, system=board) +def handle_exit(): + print("Done bootling Linux") + print("Resetting stats at the start of ROI!") + m5.stats.reset() + yield False # E.g., continue the simulation. + print("Dump stats at the end of the ROI!") + m5.stats.dump() + yield True # Stop the simulation. We're done. -# sim_quantum must be set when KVM cores are used. -root.sim_quantum = int(1e9) - -board._pre_instantiate() -m5.instantiate() +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.EXIT: handle_exit(), + }, +) # We maintain the wall clock time. @@ -284,92 +292,10 @@ globalStart = time.time() print("Running the simulation") print("Using KVM cpu") -start_tick = m5.curTick() -end_tick = m5.curTick() m5.stats.reset() -exit_event = m5.simulate() - -if exit_event.getCause() == "m5_exit instruction encountered": - # We have completed booting the OS using KVM cpu - # Reached the start of ROI - - print("Done booting Linux") - print("Resetting stats at the start of ROI!") - - m5.stats.reset() - start_tick = m5.curTick() - - # We switch to timing cpu for detailed simulation. - - processor.switch() -else: - # `m5_exit instruction encountered` was never reached - - print("Unexpected termination of simulation before ROI was reached!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# Simulate the ROI -exit_event = m5.simulate() - -# Reached the end of ROI -gem5stats = get_simstat(root) - -# We get the number of committed instructions from the timing -# cores. We then sum and print them at the end. - -roi_insts = float( - json.loads(gem5stats.dumps())["system"]["processor"]["cores2"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) + float( - json.loads(gem5stats.dumps())["system"]["processor"]["cores3"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) - -if exit_event.getCause() == "m5_exit instruction encountered": - print("Dump stats at the end of the ROI!") - m5.stats.dump() - end_tick = m5.curTick() - m5.stats.reset() - -else: - # `m5_exit instruction encountered` was never reached - - print("Unexpected termination of simulation while ROI was being executed!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# We need to copy back the contents of the `speclogs' directory to -# m5.options.outdir - -exit_event = m5.simulate() - -if exit_event.getCause() == "m5_exit instruction encountered": - print("Output logs copied!") - -else: - print("Unexpected termination of simulation while copying speclogs!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -m5.stats.dump() -end_tick = m5.curTick() -m5.stats.reset() +# We start the simulation +simulator.run() # Simulation is over at this point. We acknowledge that all the simulation # events were successful. @@ -378,9 +304,10 @@ print("All simulation events were successful.") print("Performance statistics:") -print("Simulated time: %.2fs" % ((end_tick - start_tick) / 1e12)) -print("Instructions executed: %d" % ((roi_insts))) -print("Ran a total of", m5.curTick() / 1e12, "simulated seconds") +print("Simulated time: " + ((str(simulator.get_roi_ticks()[0])))) +print( + "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds" +) print( "Total wallclock time: %.2fs, %.2f min" % (time.time() - globalStart, (time.time() - globalStart) / 60) diff --git a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py index 2bc948aea1..c4af7f5dd9 100644 --- a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py +++ b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py @@ -64,6 +64,8 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.coherence_protocol import CoherenceProtocol from gem5.resources.resource import Resource, CustomDiskImageResource +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent from m5.stats.gem5stats import get_simstat from m5.util import warn @@ -281,17 +283,23 @@ board.set_kernel_disk_workload( readfile_contents=command, ) -# We need this for long running processes. -m5.disableAllListeners() -root = Root(full_system=True, system=board) +def handle_exit(): + print("Done bootling Linux") + print("Resetting stats at the start of ROI!") + m5.stats.reset() + yield False # E.g., continue the simulation. + print("Dump stats at the end of the ROI!") + m5.stats.dump() + yield True # Stop the simulation. We're done. -# sim_quantum must be set when KVM cores are used. -root.sim_quantum = int(1e9) - -board._pre_instantiate() -m5.instantiate() +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.EXIT: handle_exit(), + }, +) # We maintain the wall clock time. @@ -300,92 +308,21 @@ globalStart = time.time() print("Running the simulation") print("Using KVM cpu") -start_tick = m5.curTick() -end_tick = m5.curTick() m5.stats.reset() -exit_event = m5.simulate() +# We start the simulation +simulator.run() -if exit_event.getCause() == "m5_exit instruction encountered": - # We have completed booting the OS using KVM cpu - # Reached the start of ROI - - print("Done booting Linux") - print("Resetting stats at the start of ROI!") - - m5.stats.reset() - start_tick = m5.curTick() - - # We switch to timing cpu for detailed simulation. - - processor.switch() -else: - print("Unexpected termination of simulation before ROI was reached!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# Simulate the ROI -exit_event = m5.simulate() - -# Reached the end of ROI -gem5stats = get_simstat(root) - -# We get the number of committed instructions from the timing -# cores. We then sum and print them at the end. - -roi_insts = float( - json.loads(gem5stats.dumps())["system"]["processor"]["cores2"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) + float( - json.loads(gem5stats.dumps())["system"]["processor"]["cores3"]["core"][ - "exec_context.thread_0" - ]["numInsts"]["value"] -) - -if exit_event.getCause() == "m5_exit instruction encountered": - print("Dump stats at the end of the ROI!") - m5.stats.dump() - end_tick = m5.curTick() - m5.stats.reset() - -else: - print("Unexpected termination of simulation while ROI was being executed!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) - -# We need to copy back the contents of the `speclogs' directory to -# m5.options.outdir - -exit_event = m5.simulate() - -if exit_event.getCause() == "m5_exit instruction encountered": - print("Output logs copied!") - -else: - print("Unexpected termination of simulation while copying speclogs!") - print( - "Exiting @ tick {} because {}.".format( - m5.curTick(), exit_event.getCause() - ) - ) - exit(-1) +# We print the final simulation statistics. print("Done with the simulation") print() print("Performance statistics:") -print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12)) -print("Instructions executed in ROI: %d" % ((roi_insts))) -print("Ran a total of", m5.curTick() / 1e12, "simulated seconds") +print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0])))) +print( + "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds" +) print( "Total wallclock time: %.2fs, %.2f min" % (time.time() - globalStart, (time.time() - globalStart) / 60) From 749c4779f4644b6660ba19636ba69ab081e33222 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Mon, 28 Nov 2022 07:21:14 +0000 Subject: [PATCH 035/492] arch-riscv: Add basic features toward rv32 support Various changes to support rv32: 1. Add riscv_bits field into RiscvISA to switch rv_type 2. Add rv_type field into ExtMachInst 3. Split various constants into rv32/rv64 version 4. Fix mcause/mstatus/misa setting per rv_type 5. Split RiscvCPU into rv32/rv64 6. Fix how reset/branch create new pc so rv_type is preserved 7. Tag gdb-xml only for rv64 TODO: Add rv32 gdb-xml Add rv32 implementation into decoder Currently there're three places where we store the rv_type information (1) ISA (2) PCState (3) ExtMachInst. In theory, the ISA should be the source of truth, and propagates information into PCState, then Inst. However, there is an API on RiscvProcess that let users modify the rv_type in PCState, so there's a chance to get inconsistent rv_type. We should either modify the structure so such kind of usage is well supported, or just prohibit people from setting a different rv_type. Change-Id: If5685ae60f8d18f4f2e18137e235989e63156404 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63091 Reviewed-by: Yu-hsin Wang Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/RiscvCPU.py | 29 ++++ src/arch/riscv/RiscvISA.py | 6 + src/arch/riscv/SConscript | 3 +- src/arch/riscv/decoder.cc | 1 + src/arch/riscv/faults.cc | 9 +- src/arch/riscv/gdb-xml/SConscript | 8 +- .../gdb-xml/{riscv.xml => riscv-64bit.xml} | 0 src/arch/riscv/insts/standard.hh | 4 +- src/arch/riscv/insts/unknown.hh | 2 +- src/arch/riscv/isa.cc | 44 ++++-- src/arch/riscv/isa.hh | 9 +- src/arch/riscv/isa/bitfields.isa | 1 + src/arch/riscv/isa/formats/basic.isa | 4 +- src/arch/riscv/isa/formats/compressed.isa | 2 +- src/arch/riscv/isa/formats/standard.isa | 14 +- src/arch/riscv/pcstate.hh | 20 ++- src/arch/riscv/process.cc | 2 +- src/arch/riscv/regs/misc.hh | 126 ++++++++++++------ src/arch/riscv/remote_gdb.cc | 89 +++++++------ src/arch/riscv/remote_gdb.hh | 4 +- src/arch/riscv/types.hh | 9 +- 21 files changed, 265 insertions(+), 121 deletions(-) rename src/arch/riscv/gdb-xml/{riscv.xml => riscv-64bit.xml} (100%) diff --git a/src/arch/riscv/RiscvCPU.py b/src/arch/riscv/RiscvCPU.py index 1c77045c67..678c3295c6 100644 --- a/src/arch/riscv/RiscvCPU.py +++ b/src/arch/riscv/RiscvCPU.py @@ -23,6 +23,8 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import functools + from m5.objects.BaseAtomicSimpleCPU import BaseAtomicSimpleCPU from m5.objects.BaseNonCachingSimpleCPU import BaseNonCachingSimpleCPU from m5.objects.BaseTimingSimpleCPU import BaseTimingSimpleCPU @@ -41,6 +43,13 @@ class RiscvCPU: ArchISA = RiscvISA +class Riscv32CPU: + ArchDecoder = RiscvDecoder + ArchMMU = RiscvMMU + ArchInterrupts = RiscvInterrupts + ArchISA = functools.partial(RiscvISA, riscv_type="RV32") + + class RiscvAtomicSimpleCPU(BaseAtomicSimpleCPU, RiscvCPU): mmu = RiscvMMU() @@ -59,3 +68,23 @@ class RiscvO3CPU(BaseO3CPU, RiscvCPU): class RiscvMinorCPU(BaseMinorCPU, RiscvCPU): mmu = RiscvMMU() + + +class Riscv32AtomicSimpleCPU(BaseAtomicSimpleCPU, Riscv32CPU): + mmu = RiscvMMU() + + +class Riscv32NonCachingSimpleCPU(BaseNonCachingSimpleCPU, Riscv32CPU): + mmu = RiscvMMU() + + +class Riscv32TimingSimpleCPU(BaseTimingSimpleCPU, Riscv32CPU): + mmu = RiscvMMU() + + +class Riscv32O3CPU(BaseO3CPU, Riscv32CPU): + mmu = RiscvMMU() + + +class Riscv32MinorCPU(BaseMinorCPU, Riscv32CPU): + mmu = RiscvMMU() diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py index ee98a5b95d..e2381fd158 100644 --- a/src/arch/riscv/RiscvISA.py +++ b/src/arch/riscv/RiscvISA.py @@ -38,10 +38,15 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from m5.params import Enum from m5.params import Param from m5.objects.BaseISA import BaseISA +class RiscvType(Enum): + vals = ["RV32", "RV64"] + + class RiscvISA(BaseISA): type = "RiscvISA" cxx_class = "gem5::RiscvISA::ISA" @@ -50,3 +55,4 @@ class RiscvISA(BaseISA): check_alignment = Param.Bool( False, "whether to check memory access alignment" ) + riscv_type = Param.RiscvType("RV64", "RV32 or RV64") diff --git a/src/arch/riscv/SConscript b/src/arch/riscv/SConscript index dd4e9aed17..924bba5915 100644 --- a/src/arch/riscv/SConscript +++ b/src/arch/riscv/SConscript @@ -70,7 +70,8 @@ SimObject('RiscvFsWorkload.py', sim_objects=['RiscvBareMetal', 'RiscvLinux'], tags='riscv isa') SimObject('RiscvInterrupts.py', sim_objects=['RiscvInterrupts'], tags='riscv isa') -SimObject('RiscvISA.py', sim_objects=['RiscvISA'], tags='riscv isa') +SimObject('RiscvISA.py', sim_objects=['RiscvISA'], + enums=['RiscvType'], tags='riscv isa') SimObject('RiscvMMU.py', sim_objects=['RiscvMMU'], tags='riscv isa') SimObject('RiscvSeWorkload.py', sim_objects=[ 'RiscvSEWorkload', 'RiscvEmuLinux'], tags='riscv isa') diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index a02415fae0..b816c17b21 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -111,6 +111,7 @@ Decoder::decode(PCStateBase &_next_pc) next_pc.compressed(false); } + emi.rv_type = static_cast(next_pc.rvType()); return decode(emi, next_pc.instAddr()); } diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc index e609222b07..3469c71252 100644 --- a/src/arch/riscv/faults.cc +++ b/src/arch/riscv/faults.cc @@ -135,10 +135,9 @@ RiscvFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) } // Set fault cause, privilege, and return PC - // Interrupt is indicated on the MSB of cause (bit 63 in RV64) uint64_t _cause = _code; if (isInterrupt()) { - _cause |= (1L << 63); + _cause |= CAUSE_INTERRUPT_MASKS[pc_state.rvType()]; } tc->setMiscReg(cause, _cause); tc->setMiscReg(epc, tc->pcState().instAddr()); @@ -177,8 +176,10 @@ Reset::invoke(ThreadContext *tc, const StaticInstPtr &inst) // Advance the PC to the implementation-defined reset vector auto workload = dynamic_cast(tc->getSystemPtr()->workload); - PCState pc(workload->getEntry()); - tc->pcState(pc); + std::unique_ptr new_pc(dynamic_cast( + tc->getIsaPtr()->newPCState(workload->getEntry()))); + panic_if(!new_pc, "Failed create new PCState from ISA pointer"); + tc->pcState(*new_pc); } void diff --git a/src/arch/riscv/gdb-xml/SConscript b/src/arch/riscv/gdb-xml/SConscript index a733b1eb0e..722137408b 100644 --- a/src/arch/riscv/gdb-xml/SConscript +++ b/src/arch/riscv/gdb-xml/SConscript @@ -43,7 +43,7 @@ Import('*') -GdbXml('riscv.xml', 'gdb_xml_riscv_target', tags='riscv isa') -GdbXml('riscv-64bit-cpu.xml', 'gdb_xml_riscv_cpu', tags='riscv isa') -GdbXml('riscv-64bit-fpu.xml', 'gdb_xml_riscv_fpu', tags='riscv isa') -GdbXml('riscv-64bit-csr.xml', 'gdb_xml_riscv_csr', tags='riscv isa') +GdbXml('riscv-64bit.xml', 'gdb_xml_riscv_64bit_target', tags='riscv isa') +GdbXml('riscv-64bit-cpu.xml', 'gdb_xml_riscv_64bit_cpu', tags='riscv isa') +GdbXml('riscv-64bit-fpu.xml', 'gdb_xml_riscv_64bit_fpu', tags='riscv isa') +GdbXml('riscv-64bit-csr.xml', 'gdb_xml_riscv_64bit_csr', tags='riscv isa') diff --git a/src/arch/riscv/gdb-xml/riscv.xml b/src/arch/riscv/gdb-xml/riscv-64bit.xml similarity index 100% rename from src/arch/riscv/gdb-xml/riscv.xml rename to src/arch/riscv/gdb-xml/riscv-64bit.xml diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh index be3470fda4..5b0e8c2c22 100644 --- a/src/arch/riscv/insts/standard.hh +++ b/src/arch/riscv/insts/standard.hh @@ -66,7 +66,7 @@ class ImmOp : public RiscvStaticInst protected: I imm; - ImmOp(const char *mnem, MachInst _machInst, OpClass __opClass) + ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : RiscvStaticInst(mnem, _machInst, __opClass), imm(0) {} }; @@ -93,7 +93,7 @@ class CSROp : public RiscvStaticInst uint64_t uimm; /// Constructor - CSROp(const char *mnem, MachInst _machInst, OpClass __opClass) + CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : RiscvStaticInst(mnem, _machInst, __opClass), csr(FUNCT12), uimm(CSRIMM) { diff --git a/src/arch/riscv/insts/unknown.hh b/src/arch/riscv/insts/unknown.hh index a271eb98b0..0c2f75e1e9 100644 --- a/src/arch/riscv/insts/unknown.hh +++ b/src/arch/riscv/insts/unknown.hh @@ -53,7 +53,7 @@ namespace RiscvISA class Unknown : public RiscvStaticInst { public: - Unknown(MachInst _machInst) + Unknown(ExtMachInst _machInst) : RiscvStaticInst("unknown", _machInst, No_OpClass) {} diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index c76bb2bdf3..c8eabd44ad 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -204,7 +204,7 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace ISA::ISA(const Params &p) : - BaseISA(p), checkAlignment(p.check_alignment) + BaseISA(p), rv_type(p.riscv_type), checkAlignment(p.check_alignment) { _regClasses.push_back(&intRegClass); _regClasses.push_back(&floatRegClass); @@ -243,12 +243,24 @@ void ISA::clear() std::fill(miscRegFile.begin(), miscRegFile.end(), 0); miscRegFile[MISCREG_PRV] = PRV_M; - miscRegFile[MISCREG_ISA] = (2ULL << MXL_OFFSET) | 0x14112D; miscRegFile[MISCREG_VENDORID] = 0; miscRegFile[MISCREG_ARCHID] = 0; miscRegFile[MISCREG_IMPID] = 0; - miscRegFile[MISCREG_STATUS] = (2ULL << UXL_OFFSET) | (2ULL << SXL_OFFSET) | - (1ULL << FS_OFFSET); + // rv_type dependent init. + switch (rv_type) { + case RV32: + miscRegFile[MISCREG_ISA] = (1ULL << MXL_OFFSETS[RV32]) | 0x14112D; + miscRegFile[MISCREG_STATUS] = (1ULL << FS_OFFSET); + break; + case RV64: + miscRegFile[MISCREG_ISA] = (2ULL << MXL_OFFSETS[RV64]) | 0x14112D; + miscRegFile[MISCREG_STATUS] = (2ULL << UXL_OFFSET) | + (2ULL << SXL_OFFSET) | + (1ULL << FS_OFFSET); + break; + default: + panic("%s: Unknown rv_type: %d", name(), (int)rv_type); + } miscRegFile[MISCREG_MCOUNTEREN] = 0x7; miscRegFile[MISCREG_SCOUNTEREN] = 0x7; // don't set it to zero; software may try to determine the supported @@ -365,8 +377,18 @@ ISA::readMiscReg(RegIndex idx) STATUS status = readMiscRegNoEffect(idx); uint64_t sd_bit = \ (status.xs == 3) || (status.fs == 3) || (status.vs == 3); - // We assume RV64 here, updating the SD bit at index 63. - status.sd = sd_bit; + // For RV32, the SD bit is at index 31 + // For RV64, the SD bit is at index 63. + switch (rv_type) { + case RV32: + status.rv32_sd = sd_bit; + break; + case RV64: + status.rv64_sd = sd_bit; + break; + default: + panic("%s: Unknown rv_type: %d", name(), (int)rv_type); + } setMiscRegNoEffect(idx, status); return readMiscRegNoEffect(idx); @@ -506,10 +528,12 @@ ISA::setMiscReg(RegIndex idx, RegVal val) break; case MISCREG_STATUS: { - // SXL and UXL are hard-wired to 64 bit - auto cur = readMiscRegNoEffect(idx); - val &= ~(STATUS_SXL_MASK | STATUS_UXL_MASK); - val |= cur & (STATUS_SXL_MASK | STATUS_UXL_MASK); + if (rv_type != RV32) { + // SXL and UXL are hard-wired to 64 bit + auto cur = readMiscRegNoEffect(idx); + val &= ~(STATUS_SXL_MASK | STATUS_UXL_MASK); + val |= cur & (STATUS_SXL_MASK | STATUS_UXL_MASK); + } setMiscRegNoEffect(idx, val); } break; diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 97a05814fe..e332956972 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -70,6 +70,7 @@ enum FPUStatus class ISA : public BaseISA { protected: + RiscvType rv_type; std::vector miscRegFile; bool checkAlignment; @@ -80,10 +81,10 @@ class ISA : public BaseISA void clear() override; - PCStateBase * + PCStateBase* newPCState(Addr new_inst_addr=0) const override { - return new PCState(new_inst_addr); + return new PCState(new_inst_addr, rv_type); } public: @@ -104,7 +105,7 @@ class ISA : public BaseISA virtual const std::unordered_map& getCSRMaskMap() const { - return CSRMasks; + return CSRMasks[rv_type]; } bool alignmentCheckEnabled() const { return checkAlignment; } @@ -125,6 +126,8 @@ class ISA : public BaseISA void handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask) override; void globalClearExclusive() override; + + RiscvType rvType() const { return rv_type; } }; } // namespace RiscvISA diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa index 60636c68f8..41935c5b0f 100644 --- a/src/arch/riscv/isa/bitfields.isa +++ b/src/arch/riscv/isa/bitfields.isa @@ -32,6 +32,7 @@ // // Bitfield definitions. // +def bitfield RVTYPE rv_type; def bitfield QUADRANT <1:0>; def bitfield OPCODE <6:2>; diff --git a/src/arch/riscv/isa/formats/basic.isa b/src/arch/riscv/isa/formats/basic.isa index 6dfeea851c..6bae0a6f86 100644 --- a/src/arch/riscv/isa/formats/basic.isa +++ b/src/arch/riscv/isa/formats/basic.isa @@ -39,7 +39,7 @@ def template BasicDeclare {{ public: /// Constructor. - %(class_name)s(MachInst machInst); + %(class_name)s(ExtMachInst machInst); Fault execute(ExecContext *, trace::InstRecord *) const override; using %(base_class)s::generateDisassembly; }; @@ -47,7 +47,7 @@ def template BasicDeclare {{ // Basic instruction class constructor template. def template BasicConstructor {{ - %(class_name)s::%(class_name)s(MachInst machInst) + %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(set_reg_idx_arr)s; diff --git a/src/arch/riscv/isa/formats/compressed.isa b/src/arch/riscv/isa/formats/compressed.isa index 6fe899b881..d09865803e 100644 --- a/src/arch/riscv/isa/formats/compressed.isa +++ b/src/arch/riscv/isa/formats/compressed.isa @@ -125,7 +125,7 @@ def template CBasicDeclare {{ public: /// Constructor. - %(class_name)s(MachInst machInst); + %(class_name)s(ExtMachInst machInst); Fault execute(ExecContext *, trace::InstRecord *) const override; std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa index 3cad5ed0c9..5390164f10 100644 --- a/src/arch/riscv/isa/formats/standard.isa +++ b/src/arch/riscv/isa/formats/standard.isa @@ -44,7 +44,7 @@ def template ImmDeclare {{ public: /// Constructor. - %(class_name)s(MachInst machInst); + %(class_name)s(ExtMachInst machInst); Fault execute(ExecContext *, trace::InstRecord *) const override; std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override; @@ -52,7 +52,7 @@ def template ImmDeclare {{ }}; def template ImmConstructor {{ - %(class_name)s::%(class_name)s(MachInst machInst) + %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(set_reg_idx_arr)s; @@ -168,7 +168,7 @@ def template BranchDeclare {{ public: /// Constructor. - %(class_name)s(MachInst machInst); + %(class_name)s(ExtMachInst machInst); Fault execute(ExecContext *, trace::InstRecord *) const override; std::string @@ -198,7 +198,9 @@ def template BranchExecute {{ %(class_name)s::branchTarget(const PCStateBase &branch_pc) const { auto &rpc = branch_pc.as(); - return std::make_unique(rpc.pc() + imm); + std::unique_ptr npc(dynamic_cast(rpc.clone())); + npc->set(rpc.pc() + imm); + return npc; } std::string @@ -226,7 +228,7 @@ def template JumpDeclare {{ public: /// Constructor. - %(class_name)s(MachInst machInst); + %(class_name)s(ExtMachInst machInst); Fault execute(ExecContext *, trace::InstRecord *) const override; std::string @@ -241,7 +243,7 @@ def template JumpDeclare {{ }}; def template JumpConstructor {{ - %(class_name)s::%(class_name)s(MachInst machInst) + %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(set_reg_idx_arr)s; diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh index 0125507f96..de07145dc3 100644 --- a/src/arch/riscv/pcstate.hh +++ b/src/arch/riscv/pcstate.hh @@ -43,21 +43,29 @@ #define __ARCH_RISCV_PCSTATE_HH__ #include "arch/generic/pcstate.hh" +#include "enums/RiscvType.hh" namespace gem5 { - namespace RiscvISA { +using RiscvType = enums::RiscvType; +constexpr enums::RiscvType RV32 = enums::RV32; +constexpr enums::RiscvType RV64 = enums::RV64; + class PCState : public GenericISA::UPCState<4> { private: bool _compressed = false; - bool _rv32 = false; + RiscvType _rv_type = RV64; public: - using GenericISA::UPCState<4>::UPCState; + PCState() = default; + PCState(const PCState &other) = default; + PCState(Addr addr, RiscvType rv_type) : UPCState(addr), _rv_type(rv_type) + { + } PCStateBase *clone() const override { return new PCState(*this); } @@ -67,14 +75,14 @@ class PCState : public GenericISA::UPCState<4> Base::update(other); auto &pcstate = other.as(); _compressed = pcstate._compressed; - _rv32 = pcstate._rv32; + _rv_type = pcstate._rv_type; } void compressed(bool c) { _compressed = c; } bool compressed() const { return _compressed; } - void rv32(bool val) { _rv32 = val; } - bool rv32() const { return _rv32; } + void rvType(RiscvType rv_type) { _rv_type = rv_type; } + RiscvType rvType() const { return _rv_type; } bool branching() const override diff --git a/src/arch/riscv/process.cc b/src/arch/riscv/process.cc index 7c91b92217..dc7abae790 100644 --- a/src/arch/riscv/process.cc +++ b/src/arch/riscv/process.cc @@ -115,7 +115,7 @@ RiscvProcess32::initState() auto *tc = system->threads[ctx]; tc->setMiscRegNoEffect(MISCREG_PRV, PRV_U); PCState pc = tc->pcState().as(); - pc.rv32(true); + pc.rvType(RV32); tc->pcState(pc); } } diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh index 5f074475c9..7f6fff4e00 100644 --- a/src/arch/riscv/regs/misc.hh +++ b/src/arch/riscv/regs/misc.hh @@ -51,10 +51,12 @@ #include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" +#include "arch/riscv/types.hh" #include "base/bitunion.hh" #include "base/types.hh" #include "cpu/reg_class.hh" #include "debug/MiscRegs.hh" +#include "enums/RiscvType.hh" namespace gem5 { @@ -550,9 +552,10 @@ const std::unordered_map CSRData = { * the fields for higher privileges. */ BitUnion64(STATUS) - Bitfield<63> sd; + Bitfield<63> rv64_sd; Bitfield<35, 34> sxl; Bitfield<33, 32> uxl; + Bitfield<31> rv32_sd; Bitfield<22> tsr; Bitfield<21> tw; Bitfield<20> tvm; @@ -590,20 +593,34 @@ BitUnion64(INTERRUPT) Bitfield<0> usi; EndBitUnion(INTERRUPT) -const off_t MXL_OFFSET = (sizeof(uint64_t) * 8 - 2); +const off_t MXL_OFFSETS[enums::Num_RiscvType] = { + [RV32] = (sizeof(uint32_t) * 8 - 2), + [RV64] = (sizeof(uint64_t) * 8 - 2), +}; const off_t SXL_OFFSET = 34; const off_t UXL_OFFSET = 32; const off_t FS_OFFSET = 13; const off_t FRM_OFFSET = 5; -const RegVal ISA_MXL_MASK = 3ULL << MXL_OFFSET; +const RegVal ISA_MXL_MASKS[enums::Num_RiscvType] = { + [RV32] = 3ULL << MXL_OFFSETS[RV32], + [RV64] = 3ULL << MXL_OFFSETS[RV64], +}; const RegVal ISA_EXT_MASK = mask(26); const RegVal ISA_EXT_C_MASK = 1UL << ('c' - 'a'); -const RegVal MISA_MASK = ISA_MXL_MASK | ISA_EXT_MASK; +const RegVal MISA_MASKS[enums::Num_RiscvType] = { + [RV32] = ISA_MXL_MASKS[RV32] | ISA_EXT_MASK, + [RV64] = ISA_MXL_MASKS[RV64] | ISA_EXT_MASK, +}; -const RegVal STATUS_SD_MASK = 1ULL << ((sizeof(uint64_t) * 8) - 1); + +const RegVal STATUS_SD_MASKS[enums::Num_RiscvType] = { + [RV32] = 1ULL << ((sizeof(uint32_t) * 8) - 1), + [RV64] = 1ULL << ((sizeof(uint64_t) * 8) - 1), +}; const RegVal STATUS_SXL_MASK = 3ULL << SXL_OFFSET; const RegVal STATUS_UXL_MASK = 3ULL << UXL_OFFSET; + const RegVal STATUS_TSR_MASK = 1ULL << 22; const RegVal STATUS_TW_MASK = 1ULL << 21; const RegVal STATUS_TVM_MASK = 1ULL << 20; @@ -621,26 +638,39 @@ const RegVal STATUS_UPIE_MASK = 1ULL << 4; const RegVal STATUS_MIE_MASK = 1ULL << 3; const RegVal STATUS_SIE_MASK = 1ULL << 1; const RegVal STATUS_UIE_MASK = 1ULL << 0; -const RegVal MSTATUS_MASK = STATUS_SD_MASK | STATUS_SXL_MASK | - STATUS_UXL_MASK | STATUS_TSR_MASK | - STATUS_TW_MASK | STATUS_TVM_MASK | - STATUS_MXR_MASK | STATUS_SUM_MASK | - STATUS_MPRV_MASK | STATUS_XS_MASK | - STATUS_FS_MASK | STATUS_VS_MASK | - STATUS_MPP_MASK | STATUS_SPP_MASK | - STATUS_MPIE_MASK | STATUS_SPIE_MASK | - STATUS_UPIE_MASK | STATUS_MIE_MASK | - STATUS_SIE_MASK | STATUS_UIE_MASK; -const RegVal SSTATUS_MASK = STATUS_SD_MASK | STATUS_UXL_MASK | - STATUS_MXR_MASK | STATUS_SUM_MASK | - STATUS_XS_MASK | STATUS_FS_MASK | - STATUS_VS_MASK | STATUS_SPP_MASK | - STATUS_SPIE_MASK | STATUS_UPIE_MASK | - STATUS_SIE_MASK | STATUS_UIE_MASK; -const RegVal USTATUS_MASK = STATUS_SD_MASK | STATUS_MXR_MASK | - STATUS_SUM_MASK | STATUS_XS_MASK | - STATUS_FS_MASK | STATUS_VS_MASK | - STATUS_UPIE_MASK | STATUS_UIE_MASK; +const RegVal MSTATUS_MASKS[enums::Num_RiscvType] = { + [RV32] = STATUS_SD_MASKS[RV32] | STATUS_TSR_MASK | STATUS_TW_MASK | + STATUS_TVM_MASK | STATUS_MXR_MASK | STATUS_SUM_MASK | + STATUS_MPRV_MASK | STATUS_XS_MASK | STATUS_FS_MASK | + STATUS_VS_MASK | STATUS_MPP_MASK | STATUS_SPP_MASK | + STATUS_MPIE_MASK | STATUS_SPIE_MASK | STATUS_UPIE_MASK | + STATUS_MIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK, + [RV64] = STATUS_SD_MASKS[RV64] | STATUS_SXL_MASK | STATUS_UXL_MASK | + STATUS_TSR_MASK | STATUS_TW_MASK | STATUS_TVM_MASK | + STATUS_MXR_MASK | STATUS_SUM_MASK | STATUS_MPRV_MASK | + STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK| + STATUS_MPP_MASK | STATUS_SPP_MASK | STATUS_MPIE_MASK | + STATUS_SPIE_MASK | STATUS_UPIE_MASK | STATUS_MIE_MASK | + STATUS_SIE_MASK | STATUS_UIE_MASK, +}; +const RegVal SSTATUS_MASKS[enums::Num_RiscvType] = { + [RV32] = STATUS_SD_MASKS[RV32] | STATUS_MXR_MASK | STATUS_SUM_MASK | + STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK | + STATUS_SPP_MASK | STATUS_SPIE_MASK | STATUS_UPIE_MASK | + STATUS_SIE_MASK | STATUS_UIE_MASK, + [RV64] = STATUS_SD_MASKS[RV64] | STATUS_UXL_MASK | STATUS_MXR_MASK | + STATUS_SUM_MASK | STATUS_XS_MASK | STATUS_FS_MASK | + STATUS_VS_MASK | STATUS_SPP_MASK | STATUS_SPIE_MASK | + STATUS_UPIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK, +}; +const RegVal USTATUS_MASKS[enums::Num_RiscvType] = { + [RV32] = STATUS_SD_MASKS[RV32] | STATUS_MXR_MASK | STATUS_SUM_MASK | + STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK | + STATUS_UPIE_MASK | STATUS_UIE_MASK, + [RV64] = STATUS_SD_MASKS[RV64] | STATUS_MXR_MASK | STATUS_SUM_MASK | + STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK | + STATUS_UPIE_MASK | STATUS_UIE_MASK, +}; const RegVal MEI_MASK = 1ULL << 11; const RegVal SEI_MASK = 1ULL << 9; @@ -661,20 +691,38 @@ const RegVal UI_MASK = UEI_MASK | UTI_MASK | USI_MASK; const RegVal FFLAGS_MASK = (1 << FRM_OFFSET) - 1; const RegVal FRM_MASK = 0x7; -const std::unordered_map CSRMasks = { - {CSR_USTATUS, USTATUS_MASK}, - {CSR_UIE, UI_MASK}, - {CSR_UIP, UI_MASK}, - {CSR_FFLAGS, FFLAGS_MASK}, - {CSR_FRM, FRM_MASK}, - {CSR_FCSR, FFLAGS_MASK | (FRM_MASK << FRM_OFFSET)}, - {CSR_SSTATUS, SSTATUS_MASK}, - {CSR_SIE, SI_MASK}, - {CSR_SIP, SI_MASK}, - {CSR_MSTATUS, MSTATUS_MASK}, - {CSR_MISA, MISA_MASK}, - {CSR_MIE, MI_MASK}, - {CSR_MIP, MI_MASK} +const RegVal CAUSE_INTERRUPT_MASKS[enums::Num_RiscvType] = { + [RV32] = (1ULL << 31), + [RV64] = (1ULL << 63), +}; + +const std::unordered_map CSRMasks[enums::Num_RiscvType] = { + [RV32] = {{CSR_USTATUS, USTATUS_MASKS[RV32]}, + {CSR_UIE, UI_MASK}, + {CSR_UIP, UI_MASK}, + {CSR_FFLAGS, FFLAGS_MASK}, + {CSR_FRM, FRM_MASK}, + {CSR_FCSR, FFLAGS_MASK | (FRM_MASK << FRM_OFFSET)}, + {CSR_SSTATUS, SSTATUS_MASKS[RV32]}, + {CSR_SIE, SI_MASK}, + {CSR_SIP, SI_MASK}, + {CSR_MSTATUS, MSTATUS_MASKS[RV32]}, + {CSR_MISA, MISA_MASKS[RV32]}, + {CSR_MIE, MI_MASK}, + {CSR_MIP, MI_MASK}}, + [RV64] = {{CSR_USTATUS, USTATUS_MASKS[RV64]}, + {CSR_UIE, UI_MASK}, + {CSR_UIP, UI_MASK}, + {CSR_FFLAGS, FFLAGS_MASK}, + {CSR_FRM, FRM_MASK}, + {CSR_FCSR, FFLAGS_MASK | (FRM_MASK << FRM_OFFSET)}, + {CSR_SSTATUS, SSTATUS_MASKS[RV64]}, + {CSR_SIE, SI_MASK}, + {CSR_SIP, SI_MASK}, + {CSR_MSTATUS, MSTATUS_MASKS[RV64]}, + {CSR_MISA, MISA_MASKS[RV64]}, + {CSR_MIE, MI_MASK}, + {CSR_MIP, MI_MASK}}, }; } // namespace RiscvISA diff --git a/src/arch/riscv/remote_gdb.cc b/src/arch/riscv/remote_gdb.cc index ed700bbf8d..4bdd88fde6 100644 --- a/src/arch/riscv/remote_gdb.cc +++ b/src/arch/riscv/remote_gdb.cc @@ -135,10 +135,10 @@ #include -#include "arch/riscv/gdb-xml/gdb_xml_riscv_cpu.hh" -#include "arch/riscv/gdb-xml/gdb_xml_riscv_csr.hh" -#include "arch/riscv/gdb-xml/gdb_xml_riscv_fpu.hh" -#include "arch/riscv/gdb-xml/gdb_xml_riscv_target.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_cpu.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_csr.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_fpu.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_target.hh" #include "arch/riscv/mmu.hh" #include "arch/riscv/pagetable_walker.hh" #include "arch/riscv/regs/float.hh" @@ -155,8 +155,16 @@ namespace gem5 using namespace RiscvISA; +static RiscvType +getRvType(ThreadContext* tc) +{ + auto isa = dynamic_cast(tc->getIsaPtr()); + panic_if(!isa, "Cannot derive rv_type from non-riscv isa"); + return isa->rvType(); +} + RemoteGDB::RemoteGDB(System *_system, int _port) - : BaseRemoteGDB(_system, _port), regCache(this) + : BaseRemoteGDB(_system, _port), regCache64(this) { } @@ -186,9 +194,10 @@ RemoteGDB::acc(Addr va, size_t len) } void -RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) +RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) { DPRINTF(GDBAcc, "getregs in remotegdb, size %lu\n", size()); + auto& RVxCSRMasks = CSRMasks[RV64]; // General registers for (int i = 0; i < int_reg::NumArchRegs; i++) { @@ -200,11 +209,11 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) for (int i = 0; i < float_reg::NumRegs; i++) r.fpu[i] = context->getReg(floatRegClass[i]); r.fflags = context->readMiscRegNoEffect( - CSRData.at(CSR_FFLAGS).physIndex) & CSRMasks.at(CSR_FFLAGS); + CSRData.at(CSR_FFLAGS).physIndex) & RVxCSRMasks.at(CSR_FFLAGS); r.frm = context->readMiscRegNoEffect( - CSRData.at(CSR_FRM).physIndex) & CSRMasks.at(CSR_FRM); + CSRData.at(CSR_FRM).physIndex) & RVxCSRMasks.at(CSR_FRM); r.fcsr = context->readMiscRegNoEffect( - CSRData.at(CSR_FCSR).physIndex) & CSRMasks.at(CSR_FCSR); + CSRData.at(CSR_FCSR).physIndex) & RVxCSRMasks.at(CSR_FCSR); // CSR registers r.cycle = context->readMiscRegNoEffect( @@ -214,9 +223,9 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) // U mode CSR r.ustatus = context->readMiscRegNoEffect( - CSRData.at(CSR_USTATUS).physIndex) & CSRMasks.at(CSR_USTATUS); + CSRData.at(CSR_USTATUS).physIndex) & RVxCSRMasks.at(CSR_USTATUS); r.uie = context->readMiscReg( - CSRData.at(CSR_UIE).physIndex) & CSRMasks.at(CSR_UIE); + CSRData.at(CSR_UIE).physIndex) & RVxCSRMasks.at(CSR_UIE); r.utvec = context->readMiscRegNoEffect( CSRData.at(CSR_UTVEC).physIndex); r.uscratch = context->readMiscRegNoEffect( @@ -228,17 +237,17 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) r.utval = context->readMiscRegNoEffect( CSRData.at(CSR_UTVAL).physIndex); r.uip = context->readMiscReg( - CSRData.at(CSR_UIP).physIndex) & CSRMasks.at(CSR_UIP); + CSRData.at(CSR_UIP).physIndex) & RVxCSRMasks.at(CSR_UIP); // S mode CSR r.sstatus = context->readMiscRegNoEffect( - CSRData.at(CSR_SSTATUS).physIndex) & CSRMasks.at(CSR_SSTATUS); + CSRData.at(CSR_SSTATUS).physIndex) & RVxCSRMasks.at(CSR_SSTATUS); r.sedeleg = context->readMiscRegNoEffect( CSRData.at(CSR_SEDELEG).physIndex); r.sideleg = context->readMiscRegNoEffect( CSRData.at(CSR_SIDELEG).physIndex); r.sie = context->readMiscReg( - CSRData.at(CSR_SIE).physIndex) & CSRMasks.at(CSR_SIE); + CSRData.at(CSR_SIE).physIndex) & RVxCSRMasks.at(CSR_SIE); r.stvec = context->readMiscRegNoEffect( CSRData.at(CSR_STVEC).physIndex); r.scounteren = context->readMiscRegNoEffect( @@ -252,7 +261,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) r.stval = context->readMiscRegNoEffect( CSRData.at(CSR_STVAL).physIndex); r.sip = context->readMiscReg( - CSRData.at(CSR_SIP).physIndex) & CSRMasks.at(CSR_SIP); + CSRData.at(CSR_SIP).physIndex) & RVxCSRMasks.at(CSR_SIP); r.satp = context->readMiscRegNoEffect( CSRData.at(CSR_SATP).physIndex); @@ -266,15 +275,15 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) r.mhartid = context->readMiscRegNoEffect( CSRData.at(CSR_MHARTID).physIndex); r.mstatus = context->readMiscRegNoEffect( - CSRData.at(CSR_MSTATUS).physIndex) & CSRMasks.at(CSR_MSTATUS); + CSRData.at(CSR_MSTATUS).physIndex) & RVxCSRMasks.at(CSR_MSTATUS); r.misa = context->readMiscRegNoEffect( - CSRData.at(CSR_MISA).physIndex) & CSRMasks.at(CSR_MISA); + CSRData.at(CSR_MISA).physIndex) & RVxCSRMasks.at(CSR_MISA); r.medeleg = context->readMiscRegNoEffect( CSRData.at(CSR_MEDELEG).physIndex); r.mideleg = context->readMiscRegNoEffect( CSRData.at(CSR_MIDELEG).physIndex); r.mie = context->readMiscReg( - CSRData.at(CSR_MIE).physIndex) & CSRMasks.at(CSR_MIE); + CSRData.at(CSR_MIE).physIndex) & RVxCSRMasks.at(CSR_MIE); r.mtvec = context->readMiscRegNoEffect( CSRData.at(CSR_MTVEC).physIndex); r.mcounteren = context->readMiscRegNoEffect( @@ -288,13 +297,13 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) r.mtval = context->readMiscRegNoEffect( CSRData.at(CSR_MTVAL).physIndex); r.mip = context->readMiscReg( - CSRData.at(CSR_MIP).physIndex) & CSRMasks.at(CSR_MIP); + CSRData.at(CSR_MIP).physIndex) & RVxCSRMasks.at(CSR_MIP); // H mode CSR (to be implemented) } void -RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const +RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const { // NOTE: no error will be reported for attempting to set masked bits. RegVal oldVal; @@ -310,23 +319,25 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const for (int i = 0; i < float_reg::NumRegs; i++) context->setReg(floatRegClass[i], r.fpu[i]); + auto& RVxCSRMasks = CSRMasks[RV64]; + oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_FFLAGS).physIndex); - mask = CSRMasks.at(CSR_FFLAGS); + mask = RVxCSRMasks.at(CSR_FFLAGS); newVal = (oldVal & ~mask) | (r.fflags & mask); context->setMiscRegNoEffect( CSRData.at(CSR_FFLAGS).physIndex, newVal); oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_FRM).physIndex); - mask = CSRMasks.at(CSR_FRM); + mask = RVxCSRMasks.at(CSR_FRM); newVal = (oldVal & ~mask) | (r.frm & mask); context->setMiscRegNoEffect( CSRData.at(CSR_FRM).physIndex, newVal); oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_FCSR).physIndex); - mask = CSRMasks.at(CSR_FCSR); + mask = RVxCSRMasks.at(CSR_FCSR); newVal = (oldVal & ~mask) | (r.fcsr & mask); context->setMiscRegNoEffect( CSRData.at(CSR_FCSR).physIndex, newVal); @@ -340,13 +351,13 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const // U mode CSR oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_USTATUS).physIndex); - mask = CSRMasks.at(CSR_USTATUS); + mask = RVxCSRMasks.at(CSR_USTATUS); newVal = (oldVal & ~mask) | (r.ustatus & mask); context->setMiscRegNoEffect( CSRData.at(CSR_USTATUS).physIndex, newVal); oldVal = context->readMiscReg( CSRData.at(CSR_UIE).physIndex); - mask = CSRMasks.at(CSR_UIE); + mask = RVxCSRMasks.at(CSR_UIE); newVal = (oldVal & ~mask) | (r.uie & mask); context->setMiscReg( CSRData.at(CSR_UIE).physIndex, newVal); @@ -362,7 +373,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_UTVAL).physIndex, r.utval); oldVal = context->readMiscReg( CSRData.at(CSR_UIP).physIndex); - mask = CSRMasks.at(CSR_UIP); + mask = RVxCSRMasks.at(CSR_UIP); newVal = (oldVal & ~mask) | (r.uip & mask); context->setMiscReg( CSRData.at(CSR_UIP).physIndex, newVal); @@ -370,7 +381,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const // S mode CSR oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_SSTATUS).physIndex); - mask = CSRMasks.at(CSR_SSTATUS); + mask = RVxCSRMasks.at(CSR_SSTATUS); newVal = (oldVal & ~mask) | (r.sstatus & mask); context->setMiscRegNoEffect( CSRData.at(CSR_SSTATUS).physIndex, newVal); @@ -380,7 +391,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_SIDELEG).physIndex, r.sideleg); oldVal = context->readMiscReg( CSRData.at(CSR_SIE).physIndex); - mask = CSRMasks.at(CSR_SIE); + mask = RVxCSRMasks.at(CSR_SIE); newVal = (oldVal & ~mask) | (r.sie & mask); context->setMiscReg( CSRData.at(CSR_SIE).physIndex, newVal); @@ -398,7 +409,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_STVAL).physIndex, r.stval); oldVal = context->readMiscReg( CSRData.at(CSR_SIP).physIndex); - mask = CSRMasks.at(CSR_SIP); + mask = RVxCSRMasks.at(CSR_SIP); newVal = (oldVal & ~mask) | (r.sip & mask); context->setMiscReg( CSRData.at(CSR_SIP).physIndex, newVal); @@ -416,13 +427,13 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_MHARTID).physIndex, r.mhartid); oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_MSTATUS).physIndex); - mask = CSRMasks.at(CSR_MSTATUS); + mask = RVxCSRMasks.at(CSR_MSTATUS); newVal = (oldVal & ~mask) | (r.mstatus & mask); context->setMiscRegNoEffect( CSRData.at(CSR_MSTATUS).physIndex, newVal); oldVal = context->readMiscRegNoEffect( CSRData.at(CSR_MISA).physIndex); - mask = CSRMasks.at(CSR_MISA); + mask = RVxCSRMasks.at(CSR_MISA); newVal = (oldVal & ~mask) | (r.misa & mask); context->setMiscRegNoEffect( CSRData.at(CSR_MISA).physIndex, newVal); @@ -432,7 +443,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_MIDELEG).physIndex, r.mideleg); oldVal = context->readMiscReg( CSRData.at(CSR_MIE).physIndex); - mask = CSRMasks.at(CSR_MIE); + mask = RVxCSRMasks.at(CSR_MIE); newVal = (oldVal & ~mask) | (r.mie & mask); context->setMiscReg( CSRData.at(CSR_MIE).physIndex, newVal); @@ -450,7 +461,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_MTVAL).physIndex, r.mtval); oldVal = context->readMiscReg( CSRData.at(CSR_MIP).physIndex); - mask = CSRMasks.at(CSR_MIP); + mask = RVxCSRMasks.at(CSR_MIP); newVal = (oldVal & ~mask) | (r.mip & mask); context->setMiscReg( CSRData.at(CSR_MIP).physIndex, newVal); @@ -473,11 +484,13 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output) Blobs::s##_len) \ } static const std::map annexMap{ - GDB_XML("target.xml", gdb_xml_riscv_target), - GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_cpu), - GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_fpu), - GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_csr)}; + GDB_XML("riscv-64bit.xml", gdb_xml_riscv_64bit_target), + GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_64bit_cpu), + GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_64bit_fpu), + GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_64bit_csr)}; #undef GDB_XML + if (getRvType(context()) == RV32) + return false; auto it = annexMap.find(annex); if (it == annexMap.end()) return false; @@ -488,7 +501,7 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output) BaseGdbRegCache * RemoteGDB::gdbRegs() { - return ®Cache; + return ®Cache64; } } // namespace gem5 diff --git a/src/arch/riscv/remote_gdb.hh b/src/arch/riscv/remote_gdb.hh index f87481ece0..a8262a6a2a 100644 --- a/src/arch/riscv/remote_gdb.hh +++ b/src/arch/riscv/remote_gdb.hh @@ -58,7 +58,7 @@ class RemoteGDB : public BaseRemoteGDB // A breakpoint will be 2 bytes if it is compressed and 4 if not bool checkBpKind(size_t kind) override { return kind == 2 || kind == 4; } - class RiscvGdbRegCache : public BaseGdbRegCache + class Riscv64GdbRegCache : public BaseGdbRegCache { using BaseGdbRegCache::BaseGdbRegCache; private: @@ -142,7 +142,7 @@ class RemoteGDB : public BaseRemoteGDB } }; - RiscvGdbRegCache regCache; + Riscv64GdbRegCache regCache64; public: RemoteGDB(System *_system, int _port); diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index f06fe3eaa4..4aae1a027b 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -43,6 +43,7 @@ #define __ARCH_RISCV_TYPES_HH__ #include "arch/riscv/pcstate.hh" +#include "base/bitunion.hh" namespace gem5 { @@ -51,7 +52,13 @@ namespace RiscvISA { typedef uint32_t MachInst; -typedef uint64_t ExtMachInst; + +// This should be further extend someday when we start to support 64b+ inst. +// For now, we should be safe using the msbs to store extra information. +BitUnion64(ExtMachInst) + // Decoder state + Bitfield<63, 62> rv_type; +EndBitUnion(ExtMachInst) } // namespace RiscvISA } // namespace gem5 From 0df37a33f602fdce8a2697655c318a702f79ba28 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 23 Nov 2022 08:35:50 +0000 Subject: [PATCH 036/492] arch-arm: Setup TC/ISA at construction time 2nd attempt This partly reverts commit ec75787aef56665e893d70293bf3a0f93c33bb6a by fixing the original problem noted by Bobby (long regressions): setupThreadContext has to be implemented otherswise the GICv3 cpu interface will end up holding old references when switching TC/ISAs. This new implementation is still setting up the cpu interface reference in the ISA only when it is required, but it is storing the TC/ISA reference within the interface every time the ISA::setupThreadContext gets called. Change-Id: I2f54f95761d63655162c253e887b872f3718c764 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65931 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Bobby Bruce --- src/arch/arm/isa.cc | 33 ++++++++++++++++++++--------- src/arch/arm/isa.hh | 1 + src/dev/arm/gic_v3.cc | 2 +- src/dev/arm/gic_v3_cpu_interface.cc | 8 +++++-- src/dev/arm/gic_v3_cpu_interface.hh | 6 +++--- 5 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index a30fd94596..543e0eba7b 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -524,15 +524,10 @@ ISA::setupThreadContext() selfDebug->init(tc); - Gicv3 *gicv3 = dynamic_cast(system->getGIC()); - if (!gicv3) - return; - - if (!gicv3CpuInterface) - gicv3CpuInterface.reset(gicv3->getCPUInterface(tc->contextId())); - - gicv3CpuInterface->setISA(this); - gicv3CpuInterface->setThreadContext(tc); + if (auto gicv3_ifc = getGICv3CPUInterface(tc); gicv3_ifc) { + gicv3_ifc->setISA(this); + gicv3_ifc->setThreadContext(tc); + } } void @@ -2008,10 +2003,28 @@ ISA::getGenericTimer() BaseISADevice & ISA::getGICv3CPUInterface() { - panic_if(!gicv3CpuInterface, "GICV3 cpu interface is not registered!"); + if (gicv3CpuInterface) + return *gicv3CpuInterface.get(); + + auto gicv3_ifc = getGICv3CPUInterface(tc); + panic_if(!gicv3_ifc, "The system does not have a GICv3 irq controller\n"); + gicv3CpuInterface.reset(gicv3_ifc); + return *gicv3CpuInterface.get(); } +BaseISADevice* +ISA::getGICv3CPUInterface(ThreadContext *tc) +{ + assert(system); + Gicv3 *gicv3 = dynamic_cast(system->getGIC()); + if (gicv3) { + return gicv3->getCPUInterface(tc->contextId()); + } else { + return nullptr; + } +} + bool ISA::inSecureState() const { diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index 1f7a7561a7..9e1afa714b 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -116,6 +116,7 @@ namespace ArmISA BaseISADevice &getGenericTimer(); BaseISADevice &getGICv3CPUInterface(); + BaseISADevice *getGICv3CPUInterface(ThreadContext *tc); RegVal miscRegs[NUM_MISCREGS]; const RegId *intRegMap; diff --git a/src/dev/arm/gic_v3.cc b/src/dev/arm/gic_v3.cc index dde3818b07..e14d1f2bef 100644 --- a/src/dev/arm/gic_v3.cc +++ b/src/dev/arm/gic_v3.cc @@ -147,7 +147,7 @@ Gicv3::init() for (int i = 0; i < threads; i++) { redistributors[i] = new Gicv3Redistributor(this, i); - cpuInterfaces[i] = new Gicv3CPUInterface(this, i); + cpuInterfaces[i] = new Gicv3CPUInterface(this, sys->threads[i]); } distRange = RangeSize(params().dist_addr, diff --git a/src/dev/arm/gic_v3_cpu_interface.cc b/src/dev/arm/gic_v3_cpu_interface.cc index 0e1dbaa04b..28a173943d 100644 --- a/src/dev/arm/gic_v3_cpu_interface.cc +++ b/src/dev/arm/gic_v3_cpu_interface.cc @@ -55,15 +55,19 @@ using namespace ArmISA; const uint8_t Gicv3CPUInterface::GIC_MIN_BPR; const uint8_t Gicv3CPUInterface::GIC_MIN_BPR_NS; -Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id) +Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, ThreadContext *_tc) : BaseISADevice(), gic(gic), redistributor(nullptr), distributor(nullptr), - cpuId(cpu_id) + tc(_tc), + maintenanceInterrupt(gic->params().maint_int->get(tc)), + cpuId(tc->contextId()) { hppi.prio = 0xff; hppi.intid = Gicv3::INTID_SPURIOUS; + + setISA(static_cast(tc->getIsaPtr())); } void diff --git a/src/dev/arm/gic_v3_cpu_interface.hh b/src/dev/arm/gic_v3_cpu_interface.hh index e860373fb5..ff476bc3c6 100644 --- a/src/dev/arm/gic_v3_cpu_interface.hh +++ b/src/dev/arm/gic_v3_cpu_interface.hh @@ -68,10 +68,10 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable Gicv3 * gic; Gicv3Redistributor * redistributor; Gicv3Distributor * distributor; - uint32_t cpuId; - ArmInterruptPin *maintenanceInterrupt; ThreadContext *tc; + ArmInterruptPin *maintenanceInterrupt; + uint32_t cpuId; BitUnion64(ICC_CTLR_EL1) Bitfield<63, 20> res0_3; @@ -359,7 +359,7 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable void setBankedMiscReg(ArmISA::MiscRegIndex misc_reg, RegVal val) const; public: - Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id); + Gicv3CPUInterface(Gicv3 * gic, ThreadContext *tc); void init(); From ed6cf2ecedb277a46e205d00613578918ea37e12 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Tue, 1 Nov 2022 17:43:35 +0000 Subject: [PATCH 037/492] dev-arm: Allow GICv3 to be externally(publicly) updated Change-Id: Ifa7b745ea11e74c17024c22ae993b6103eecb744 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66271 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/dev/arm/gic_v3.cc | 6 ++++++ src/dev/arm/gic_v3.hh | 2 ++ src/dev/arm/gic_v3_distributor.hh | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/dev/arm/gic_v3.cc b/src/dev/arm/gic_v3.cc index e14d1f2bef..cb462f7340 100644 --- a/src/dev/arm/gic_v3.cc +++ b/src/dev/arm/gic_v3.cc @@ -286,6 +286,12 @@ Gicv3::postInt(uint32_t cpu, ArmISA::InterruptTypes int_type) ArmSystem::callClearStandByWfi(tc); } +void +Gicv3::update() +{ + distributor->update(); +} + bool Gicv3::supportsVersion(GicVersion version) { diff --git a/src/dev/arm/gic_v3.hh b/src/dev/arm/gic_v3.hh index 120b0390a6..2ea6a98b3b 100644 --- a/src/dev/arm/gic_v3.hh +++ b/src/dev/arm/gic_v3.hh @@ -206,6 +206,8 @@ class Gicv3 : public BaseGic, public Gicv3Registers void postInt(uint32_t cpu, ArmISA::InterruptTypes int_type); + void update(); + protected: // GIC state transfer void copyGicState(Gicv3Registers* from, Gicv3Registers* to); diff --git a/src/dev/arm/gic_v3_distributor.hh b/src/dev/arm/gic_v3_distributor.hh index 9960e91593..f80800fb0a 100644 --- a/src/dev/arm/gic_v3_distributor.hh +++ b/src/dev/arm/gic_v3_distributor.hh @@ -257,7 +257,6 @@ class Gicv3Distributor : public Serializable void serialize(CheckpointOut & cp) const override; void unserialize(CheckpointIn & cp) override; - void update(); Gicv3CPUInterface* route(uint32_t int_id); public: @@ -274,6 +273,7 @@ class Gicv3Distributor : public Serializable bool is_secure_access); void copy(Gicv3Registers *from, Gicv3Registers *to); + void update(); }; } // namespace gem5 From 596da56b6108e648875e1c22b23ff0153c0bcaf4 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Thu, 1 Dec 2022 11:37:14 +0000 Subject: [PATCH 038/492] arch-arm: Remove deprecated Armv7 debug Vector Catch This was part of Armv7 self hosted debug and has been officially deprecated in Armv8 Change-Id: I6ad240ac7dfc389f7de32d4b5b44d9da238c6e46 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66251 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Andreas Sandberg Reviewed-by: Andreas Sandberg --- src/arch/arm/faults.cc | 17 ----- src/arch/arm/self_debug.cc | 126 +------------------------------------ src/arch/arm/self_debug.hh | 47 +------------- 3 files changed, 2 insertions(+), 188 deletions(-) diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc index b4ef199201..379e761f98 100644 --- a/src/arch/arm/faults.cc +++ b/src/arch/arm/faults.cc @@ -503,9 +503,6 @@ ArmFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) void ArmFault::invoke32(ThreadContext *tc, const StaticInstPtr &inst) { - if (vectorCatch(tc, inst)) - return; - // ARMv7 (ARM ARM issue C B1.9) bool have_security = ArmSystem::haveEL(tc, EL3); @@ -729,20 +726,6 @@ ArmFault::invoke64(ThreadContext *tc, const StaticInstPtr &inst) setSyndrome(tc, getSyndromeReg64()); } -bool -ArmFault::vectorCatch(ThreadContext *tc, const StaticInstPtr &inst) -{ - SelfDebug *sd = ArmISA::ISA::getSelfDebug(tc); - VectorCatch* vc = sd->getVectorCatch(tc); - if (vc && !vc->isVCMatch()) { - Fault fault = sd->testVectorCatch(tc, 0x0, this); - if (fault != NoFault) - fault->invoke(tc, inst); - return true; - } - return false; -} - ArmStaticInst * ArmFault::instrAnnotate(const StaticInstPtr &inst) { diff --git a/src/arch/arm/self_debug.cc b/src/arch/arm/self_debug.cc index 27064cd7be..a4e685fce8 100644 --- a/src/arch/arm/self_debug.cc +++ b/src/arch/arm/self_debug.cc @@ -56,9 +56,7 @@ SelfDebug::testDebug(ThreadContext *tc, const RequestPtr &req, if (mode == BaseMMU::Execute) { const bool d_step = softStep->advanceSS(tc); if (!d_step) { - fault = testVectorCatch(tc, req->getVaddr(), nullptr); - if (fault == NoFault) - fault = testBreakPoints(tc, req->getVaddr()); + fault = testBreakPoints(tc, req->getVaddr()); } } else if (!req->isCacheMaintenance() || (req->isCacheInvalidate() && !req->isCacheClean())) { @@ -368,10 +366,6 @@ SelfDebug::init(ThreadContext *tc) const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); const HDCR mdcr = tc->readMiscRegNoEffect(MISCREG_MDCR_EL2); setenableTDETGE(hcr, mdcr); - - // Enable Vector Catch Exceptions - const DEVID dvid = tc->readMiscReg(MISCREG_DBGDEVID0); - vcExcpt = new VectorCatch(dvid.vectorcatch==0x0, this); } bool @@ -706,122 +700,4 @@ SoftwareStep::advanceSS(ThreadContext * tc) return res; } -Fault -SelfDebug::testVectorCatch(ThreadContext *tc, Addr addr, - ArmFault *fault) -{ - - setAArch32(tc); - to32 = targetAArch32(tc); - if (!isDebugEnabled(tc) || !mde || !aarch32) - return NoFault; - - ExceptionLevel el = (ExceptionLevel) currEL(tc); - bool do_debug; - if (fault == nullptr) - do_debug = vcExcpt->addressMatching(tc, addr, el); - else - do_debug = vcExcpt->exceptionTrapping(tc, el, fault); - if (do_debug) { - if (enableTdeTge) { - return std::make_shared(0, 0x22, - ExceptionClass::PREFETCH_ABORT_TO_HYP); - } else { - return std::make_shared(addr, - ArmFault::DebugEvent, false, - ArmFault::UnknownTran, - ArmFault::VECTORCATCH); - } - } - - return NoFault; -} - -bool -VectorCatch::addressMatching(ThreadContext *tc, Addr addr, ExceptionLevel el) -{ - // Each bit position in this string corresponds to a bit in DBGVCR - // and an exception vector. - bool enabled; - if (conf->isAArch32() && ELIs32(tc, EL1) && - (addr & 0x3) == 0 && el != EL2 ) { - - DBGVCR match_word = 0x0; - - Addr vbase = getVectorBase(tc, false); - Addr vaddress = addr & ~ 0x1f; - Addr low_addr = bits(addr, 5, 2); - if (vaddress == vbase) { - if (ArmSystem::haveEL(tc, EL3) && !isSecure(tc)) { - uint32_t bmask = 1UL << (low_addr + 24); - match_word = match_word | (DBGVCR) bmask; - // Non-secure vectors - } else { - uint32_t bmask = 1UL << (low_addr); - match_word = match_word | (DBGVCR) bmask; - // Secure vectors (or no EL3) - } - } - uint32_t mvbase = getVectorBase(tc, true); - if (ArmSystem::haveEL(tc, EL3) && ELIs32(tc, EL3) && - isSecure(tc) && (vaddress == mvbase)) { - uint32_t bmask = 1UL << (low_addr + 8); - match_word = match_word | (DBGVCR) bmask; - // Monitor vectors - } - - DBGVCR mask; - - // Mask out bits not corresponding to vectors. - if (!ArmSystem::haveEL(tc, EL3)) { - mask = (DBGVCR) 0xDE; - } else if (!ELIs32(tc, EL3)) { - mask = (DBGVCR) 0xDE0000DE; - } else { - mask = (DBGVCR) 0xDE00DEDE; - } - DBGVCR dbgvcr = tc->readMiscReg(MISCREG_DBGVCR); - match_word = match_word & dbgvcr & mask; - enabled = match_word != 0x0; - // Check for UNPREDICTABLE case - match on Prefetch Abort and - // Data Abort vectors - ExceptionLevel ELd = debugTargetFrom(tc, isSecure(tc)); - if (((match_word & 0x18001818) != 0x0) && ELd == el) { - enabled = false; - } - } else { - enabled = false; - } - return enabled; -} - -bool -VectorCatch::exceptionTrapping(ThreadContext *tc, ExceptionLevel el, - ArmFault* fault) -{ - if (conf->isAArch32() && ELIs32(tc, EL1) && el != EL2) { - - DBGVCR dbgvcr = tc->readMiscReg(MISCREG_DBGVCR); - DBGVCR match_type = fault->vectorCatchFlag(); - DBGVCR mask; - - if (!ArmSystem::haveEL(tc, EL3)) { - mask = (DBGVCR) 0xDE; - } else if (ELIs32(tc, EL3) && fault->getToMode() == MODE_MON) { - mask = (DBGVCR) 0x0000DE00; - } else { - if (isSecure(tc)) - mask = (DBGVCR) 0x000000DE; - else - mask = (DBGVCR) 0xDE000000; - } - match_type = match_type & mask & dbgvcr; - - if (match_type != 0x0) { - return true; - } - } - return false; -} - } // namespace gem5 diff --git a/src/arch/arm/self_debug.hh b/src/arch/arm/self_debug.hh index 5ad0d0991f..069df24066 100644 --- a/src/arch/arm/self_debug.hh +++ b/src/arch/arm/self_debug.hh @@ -239,48 +239,12 @@ class SoftwareStep } }; -class VectorCatch -{ - private: - bool vcmatch; - SelfDebug *conf; - std::vector vectorTypes(); - - public: - VectorCatch(bool _vcmatch, SelfDebug* s) : vcmatch(_vcmatch), conf(s) - {} - - bool addressMatching(ThreadContext *tc, Addr addr, ExceptionLevel el); - bool exceptionTrapping(ThreadContext *tc, ExceptionLevel el, - ArmFault* fault); - - bool isVCMatch() const { return vcmatch; } - - private: - Addr - getVectorBase(ThreadContext *tc, bool monitor) - { - if (monitor) { - return tc->readMiscReg(MISCREG_MVBAR) & ~0x1F; - } - SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); - if (sctlr.v) { - return (Addr) 0xFFFF0000; - } else { - Addr vbar = tc->readMiscReg(MISCREG_VBAR) & ~0x1F; - return vbar; - } - } - -}; - class SelfDebug { private: std::vector arBrkPoints; std::vector arWatchPoints; SoftwareStep * softStep; - VectorCatch * vcExcpt; bool enableTdeTge; // MDCR_EL2.TDE || HCR_EL2.TGE @@ -294,7 +258,7 @@ class SelfDebug public: SelfDebug() - : softStep(nullptr), vcExcpt(nullptr), enableTdeTge(false), + : softStep(nullptr), enableTdeTge(false), mde(false), sdd(false), kde(false), oslk(false) { softStep = new SoftwareStep(this); @@ -303,7 +267,6 @@ class SelfDebug ~SelfDebug() { delete softStep; - delete vcExcpt; } Fault testDebug(ThreadContext *tc, const RequestPtr &req, @@ -318,8 +281,6 @@ class SelfDebug Fault triggerWatchpointException(ThreadContext *tc, Addr vaddr, bool write, bool cm); public: - Fault testVectorCatch(ThreadContext *tc, Addr addr, ArmFault* flt); - bool enabled() const { return mde || softStep->bSS; }; inline BrkPoint* @@ -445,12 +406,6 @@ class SelfDebug return softStep; } - VectorCatch* - getVectorCatch(ThreadContext *tc) - { - return vcExcpt; - } - bool targetAArch32(ThreadContext *tc) { From 4fc690f6b7f98c8bc846b970a024879cd91fec79 Mon Sep 17 00:00:00 2001 From: Jarvis Jia Date: Wed, 23 Nov 2022 15:11:16 -0600 Subject: [PATCH 039/492] mem-cache: Fix FIFO replacement Change FIFO from using curTicks() to using timeTicks counter to avoid issues where multiple lines are considered to have entered the cache at the same tick. Change-Id: I5e0b894eb9bec4f0f8bc8f48ec2766a0fc5079c6 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65952 Reviewed-by: Daniel Carvalho Reviewed-by: Matt Sinclair Maintainer: Daniel Carvalho Tested-by: kokoro Maintainer: Matt Sinclair --- src/mem/cache/replacement_policies/fifo_rp.cc | 6 ++---- src/mem/cache/replacement_policies/fifo_rp.hh | 8 +++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mem/cache/replacement_policies/fifo_rp.cc b/src/mem/cache/replacement_policies/fifo_rp.cc index 9655c96fa7..bc0680bc8a 100644 --- a/src/mem/cache/replacement_policies/fifo_rp.cc +++ b/src/mem/cache/replacement_policies/fifo_rp.cc @@ -36,11 +36,9 @@ namespace gem5 { - GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { - FIFO::FIFO(const Params &p) : Base(p) { @@ -51,7 +49,7 @@ FIFO::invalidate(const std::shared_ptr& replacement_data) { // Reset insertion tick std::static_pointer_cast( - replacement_data)->tickInserted = Tick(0); + replacement_data)->tickInserted = ++timeTicks; } void @@ -65,7 +63,7 @@ FIFO::reset(const std::shared_ptr& replacement_data) const { // Set insertion tick std::static_pointer_cast( - replacement_data)->tickInserted = curTick(); + replacement_data)->tickInserted = ++timeTicks; } ReplaceableEntry* diff --git a/src/mem/cache/replacement_policies/fifo_rp.hh b/src/mem/cache/replacement_policies/fifo_rp.hh index f4703d1299..4b62fd220a 100644 --- a/src/mem/cache/replacement_policies/fifo_rp.hh +++ b/src/mem/cache/replacement_policies/fifo_rp.hh @@ -56,13 +56,19 @@ class FIFO : public Base { /** Tick on which the entry was inserted. */ Tick tickInserted; - /** * Default constructor. Invalidate data. */ FIFOReplData() : tickInserted(0) {} }; + private: + /** + * A counter that tracks the number of + * ticks since being created to avoid a tie + */ + mutable Tick timeTicks; + public: typedef FIFORPParams Params; FIFO(const Params &p); From b9c0851120b780523c77a9210db3e5d85e9e0fb9 Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Wed, 23 Nov 2022 17:13:49 +0800 Subject: [PATCH 040/492] systemc: fix the payload and packet association in Gem5ToTlm bridge If a request is initiated by systemc, passed through TlmToGem5 bridge and Gem5ToTlm bridge, it wouldn't have the systemc extension about the association. This feature is also used in TlmToGem5 bridge to detect if the packet is allocated in the current instance in async interface. In that case, we would lose the association in the Gem5ToTlm bridge async interface. For not making wide change, we need an extra way to support the association in Gem5ToTlm bridge async interface. This change adds another map to record the association and clears when the TLM transaction is completed. Change-Id: I486441e813236ea2cabd1bd6cbb085b08d75ec8f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66054 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/systemc/tlm_bridge/gem5_to_tlm.cc | 12 ++++++++---- src/systemc/tlm_bridge/gem5_to_tlm.hh | 7 +++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.cc b/src/systemc/tlm_bridge/gem5_to_tlm.cc index a5eb9df27e..515975224e 100644 --- a/src/systemc/tlm_bridge/gem5_to_tlm.cc +++ b/src/systemc/tlm_bridge/gem5_to_tlm.cc @@ -231,10 +231,10 @@ Gem5ToTlmBridge::pec( } } if (phase == tlm::BEGIN_RESP) { - auto &extension = Gem5SystemC::Gem5Extension::getExtension(trans); - auto packet = extension.getPacket(); + PacketPtr packet = packetMap[&trans]; sc_assert(!blockingResponse); + sc_assert(packet); bool need_retry = false; @@ -258,6 +258,7 @@ Gem5ToTlmBridge::pec( sc_core::sc_time delay = sc_core::SC_ZERO_TIME; socket->nb_transport_fw(trans, fw_phase, delay); // Release the transaction with all the extensions. + packetMap.erase(&trans); trans.release(); } } @@ -433,11 +434,13 @@ Gem5ToTlmBridge::recvTimingReq(PacketPtr packet) sc_assert(phase == tlm::BEGIN_REQ); // Accepted but is now blocking until END_REQ (exclusion rule). blockingRequest = trans; + packetMap.emplace(trans, packet); } else if (status == tlm::TLM_UPDATED) { // The Timing annotation must be honored: sc_assert(phase == tlm::END_REQ || phase == tlm::BEGIN_RESP); // Accepted but is now blocking until END_REQ (exclusion rule). blockingRequest = trans; + packetMap.emplace(trans, packet); auto cb = [this, trans, phase]() { pec(*trans, phase); }; auto event = new EventFunctionWrapper( cb, "pec", true, getPriorityOfTlmPhase(phase)); @@ -477,8 +480,8 @@ Gem5ToTlmBridge::recvRespRetry() tlm::tlm_generic_payload *trans = blockingResponse; blockingResponse = nullptr; - PacketPtr packet = - Gem5SystemC::Gem5Extension::getExtension(trans).getPacket(); + PacketPtr packet = packetMap[blockingResponse]; + sc_assert(packet); bool need_retry = !bridgeResponsePort.sendTimingResp(packet); @@ -488,6 +491,7 @@ Gem5ToTlmBridge::recvRespRetry() tlm::tlm_phase phase = tlm::END_RESP; socket->nb_transport_fw(*trans, phase, delay); // Release transaction with all the extensions + packetMap.erase(trans); trans->release(); } diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.hh b/src/systemc/tlm_bridge/gem5_to_tlm.hh index 23415b843b..35d6ba3b4d 100644 --- a/src/systemc/tlm_bridge/gem5_to_tlm.hh +++ b/src/systemc/tlm_bridge/gem5_to_tlm.hh @@ -61,6 +61,7 @@ #include #include +#include #include "mem/backdoor.hh" #include "mem/port.hh" @@ -173,6 +174,12 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase */ tlm::tlm_generic_payload *blockingResponse; + /** + * A map to record the association between payload and packet. This helps us + * could get the correct packet when handling nonblocking interfaces. + */ + std::unordered_map packetMap; + gem5::AddrRangeList addrRanges; protected: From 9d1cc1bcc91290aa32253462ea3bc6df1a9d83c5 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 5 Dec 2022 05:03:53 -0800 Subject: [PATCH 041/492] dev: Add an offset checking mechanism to RegisterBank. When adding a long list of registers, it can be easy to miss one which will offset all the registers after it. It can be hard to find those sorts of problems, and tedious and error prone to fix them. This change adds a mechanism to simply annotate what offset a register should have. That should also make the register list more self documenting, since you'll be able to easily see what offset a register has from the source without having to count up everything in front of it. Change-Id: Ia7e419ffb062a64a10106305f875cec6f9fe9a80 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66431 Reviewed-by: Yu-hsin Wang Maintainer: Gabe Black Tested-by: kokoro --- src/dev/reg_bank.hh | 70 +++++++++++++++++++++++++++++++++------- src/dev/reg_bank.test.cc | 39 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 11 deletions(-) diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh index 42af7bce89..31c0ce5b66 100644 --- a/src/dev/reg_bank.hh +++ b/src/dev/reg_bank.hh @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -84,6 +85,9 @@ * entire device, with the address from accesses passed into read or write * unmodified. * + * The base(), size() and name() methods can be used to access each of those + * read only properties of the RegisterBank instance. + * * To add actual registers to the RegisterBank (discussed below), you can use * either the addRegister method which adds a single register, or addRegisters * which adds an initializer list of them all at once. The register will be @@ -91,8 +95,19 @@ * existing registers. The size of the bank is automatically accumulated as * registers are added. * - * The base(), size() and name() methods can be used to access each of those - * read only properties of the RegisterBank instance. + * When adding a lot of registers, you might accidentally add an extra, + * or accidentally skip one in a long list. Because the offset is handled + * automatically, some of your registers might end up shifted higher or lower + * than you expect. To help mitigate this, you can set what offset you expect + * a register to have by specifying it as an offset, register pair. + * + * addRegisters({{0x1000, reg0}, reg1, reg2}); + * + * If the register would end up at a different offset, gem5 will panic. You + * can also leave off the register if you want to just check the offset, for + * instance between groups of registers. + * + * addRegisters({reg0, reg1, reg2, 0x100c}) * * While the RegisterBank itself doesn't have any data in it directly and so * has no endianness, it's very likely all the registers within it will have @@ -805,19 +820,52 @@ class RegisterBank : public RegisterBankBase virtual ~RegisterBank() {} - void - addRegisters( - std::initializer_list> regs) + class RegisterAdder { - panic_if(regs.size() == 0, "Adding an empty list of registers to %s?", - name()); - for (auto ®: regs) { - _offsetMap.emplace(_base + _size, reg); - _size += reg.get().size(); + private: + std::optional offset; + std::optional reg; + + public: + // Nothing special to do for this register. + RegisterAdder(RegisterBase &new_reg) : reg(&new_reg) {} + // Ensure that this register is added at a particular offset. + RegisterAdder(Addr new_offset, RegisterBase &new_reg) : + offset(new_offset), reg(&new_reg) + {} + // No register, just check that the offset is what we expect. + RegisterAdder(Addr new_offset) : offset(new_offset) {} + + friend class RegisterBank; + }; + + void + addRegisters(std::initializer_list adders) + { + panic_if(std::empty(adders), + "Adding an empty list of registers to %s?", name()); + for (auto &adder: adders) { + const Addr offset = _base + _size; + + if (adder.reg) { + auto *reg = adder.reg.value(); + if (adder.offset && adder.offset.value() != offset) { + panic( + "Expected offset of register %s.%s to be %#x, is %#x.", + name(), reg->name(), adder.offset.value(), offset); + } + _offsetMap.emplace(offset, *reg); + _size += reg->size(); + } else if (adder.offset) { + if (adder.offset.value() != offset) { + panic("Expected current offset of %s to be %#x, is %#x.", + name(), adder.offset.value(), offset); + } + } } } - void addRegister(RegisterBase ®) { addRegisters({reg}); } + void addRegister(RegisterAdder reg) { addRegisters({reg}); } Addr base() const { return _base; } Addr size() const { return _size; } diff --git a/src/dev/reg_bank.test.cc b/src/dev/reg_bank.test.cc index 534f86295b..b4bc969724 100644 --- a/src/dev/reg_bank.test.cc +++ b/src/dev/reg_bank.test.cc @@ -55,6 +55,7 @@ #include +#include "base/gtest/logging.hh" #include "dev/reg_bank.hh" using namespace gem5; @@ -64,6 +65,9 @@ using testing::ElementsAre; // This version is needed with enough elements, empirically more than 10. using testing::ElementsAreArray; +using testing::AllOf; +using testing::HasSubstr; + /* * The RegisterRaz (read as zero) type. @@ -1011,6 +1015,41 @@ TEST_F(RegisterBankTest, AddRegistersSize) EXPECT_EQ(emptyBank.size(), 12); } +TEST_F(RegisterBankTest, AddRegistersWithOffsetChecks) +{ + emptyBank.addRegister({0x12345}); + EXPECT_EQ(emptyBank.size(), 0); + emptyBank.addRegister({0x12345, reg0}); + EXPECT_EQ(emptyBank.size(), 4); + emptyBank.addRegister({0x12349}); + EXPECT_EQ(emptyBank.size(), 4); + + emptyBank.addRegisters({{0x12349, reg1}, {0x1234d}, {0x1234d, reg2}}); + EXPECT_EQ(emptyBank.size(), 12); +} + +TEST_F(RegisterBankTest, BadRegisterOffsetDeath) +{ + gtestLogOutput.str(""); + EXPECT_ANY_THROW(emptyBank.addRegisters({{0xabcd, reg0}, reg1})); + + std::string actual = gtestLogOutput.str(); + EXPECT_THAT(actual, HasSubstr("empty.reg0")); + EXPECT_THAT(actual, HasSubstr("to be 0xabcd")); + EXPECT_THAT(actual, HasSubstr("is 0x12345")); +} + +TEST_F(RegisterBankTest, BadBankOffsetDeath) +{ + gtestLogOutput.str(""); + EXPECT_ANY_THROW(emptyBank.addRegisters({{0xabcd}, reg0})); + + std::string actual = gtestLogOutput.str(); + EXPECT_THAT(actual, HasSubstr("empty ")); + EXPECT_THAT(actual, HasSubstr("to be 0xabcd")); + EXPECT_THAT(actual, HasSubstr("is 0x12345")); +} + // Reads. TEST_F(RegisterBankTest, ReadOneAlignedFirst) From ae20719576533e7d049cecccb0c938d39a2e5f58 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 5 Dec 2022 15:28:00 -0800 Subject: [PATCH 042/492] python: Remove 'scheduleTickExit' in favor of 'exitSimLoop' The commit https://gem5-review.googlesource.com/c/public/gem5/+/66231 added an API to m5 for scheduling to-tick exit events. This added the function `schedule_tick_exit`. It was later pointed out that this `schedule_tick_exit` event is redundant given the existance of `exitSimLoop`. This patch therefore removes `schedule_tick_exit` in favor of `exitSimLoop`. Change-Id: Ibecf00b98256a5da2868427d766bdc93f03c3f97 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66451 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Reviewed-by: Andreas Sandberg Tested-by: kokoro Maintainer: Andreas Sandberg --- src/python/m5/simulate.py | 2 +- src/python/pybind11/event.cc | 2 -- src/sim/simulate.cc | 5 ----- src/sim/simulate.hh | 10 ---------- 4 files changed, 1 insertion(+), 18 deletions(-) diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index 744d95f9f6..18fb1d6cd4 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -259,7 +259,7 @@ def scheduleTickExitAbsolute( """ if tick <= curTick(): warn("Tick exit scheduled for the past. This will not be triggered.") - _m5.event.scheduleTickExit(tick=tick, exit_string=exit_string) + _m5.event.exitSimLoop(exit_string, 0, tick, 0, False) def drain(): diff --git a/src/python/pybind11/event.cc b/src/python/pybind11/event.cc index 827768f52f..95e6ddb844 100644 --- a/src/python/pybind11/event.cc +++ b/src/python/pybind11/event.cc @@ -109,8 +109,6 @@ pybind_init_event(py::module_ &m_native) py::arg("ticks") = MaxTick); m.def("setMaxTick", &set_max_tick, py::arg("tick")); m.def("getMaxTick", &get_max_tick, py::return_value_policy::copy); - m.def("scheduleTickExit", &schedule_tick_exit, py::arg("tick"), - py::arg("exit_string")); m.def("terminateEventQueueThreads", &terminateEventQueueThreads); m.def("exitSimLoop", &exitSimLoop); m.def("getEventQueue", []() { return curEventQueue(); }, diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc index f147b3ec77..86d516d39a 100644 --- a/src/sim/simulate.cc +++ b/src/sim/simulate.cc @@ -266,11 +266,6 @@ Tick get_max_tick() return simulate_limit_event->when(); } -void schedule_tick_exit(Tick tick, std::string exit_string) -{ - new GlobalSimLoopExitEvent(tick, exit_string, 0); -} - void terminateEventQueueThreads() { diff --git a/src/sim/simulate.hh b/src/sim/simulate.hh index e7c4fa640c..eacf67cec2 100644 --- a/src/sim/simulate.hh +++ b/src/sim/simulate.hh @@ -67,16 +67,6 @@ void set_max_tick(Tick tick); */ Tick get_max_tick(); -/** - * @brief Schedule an exit event at a particular tick. - * - * Schedule a tick with a particular exit string. - * - * @param tick The tick at which the simulation loop should exit. - * @param exit_string The exit string explaining the exit. - */ -void schedule_tick_exit(Tick tick, std::string exit_string); - /** * Terminate helper threads when running in parallel mode. * From 985d9c641f7e72d319a115676c280780e04ed8da Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Fri, 2 Dec 2022 16:27:16 +0800 Subject: [PATCH 043/492] systemc: replace the deprecated std::iterator std::iterator is deprecated in c++17. We can just declare the required types for iterator traits directly without the helper. Change-Id: I789e2c2b13e56cc391527686109df8b779474d09 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66351 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/systemc/ext/utils/sc_vector.hh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/systemc/ext/utils/sc_vector.hh b/src/systemc/ext/utils/sc_vector.hh index ef0d7dc24a..c1d9ded0ce 100644 --- a/src/systemc/ext/utils/sc_vector.hh +++ b/src/systemc/ext/utils/sc_vector.hh @@ -49,6 +49,7 @@ #include +#include #include #include #include @@ -259,10 +260,7 @@ class sc_member_access template > -class sc_vector_iter : - public std::iterator, - private AccessPolicy +class sc_vector_iter : private AccessPolicy { private: typedef Element ElementType; @@ -282,8 +280,6 @@ class sc_vector_iter : template friend class sc_vector_iter; - typedef std::iterator - BaseType; typedef sc_vector_iter ThisType; typedef sc_vector VectorType; typedef std::vector StorageType; @@ -315,9 +311,11 @@ class sc_vector_iter : // Conforms to Random Access Iterator category. // See ISO/IEC 14882:2003(E), 24.1 [lib.iterator.requirements] - typedef typename BaseType::difference_type difference_type; - typedef typename BaseType::reference reference; - typedef typename BaseType::pointer pointer; + using difference_type = std::ptrdiff_t; + using value_type = typename AccessPolicy::Type; + using reference = typename AccessPolicy::Type &; + using pointer = typename AccessPolicy::Type *; + using iterator_category = std::random_access_iterator_tag; sc_vector_iter() : Policy(), it_() {} From 00a893ad4e2195450090810ee6d3de1c58b12194 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 4 Oct 2022 05:36:22 -0700 Subject: [PATCH 044/492] systemc: Enable DMI in the non-blocking/timing mode bridge. Change-Id: Ia618081e2dbf8b49f62480ac5dc29f87100cd4f1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65754 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Gabe Black --- src/systemc/tlm_bridge/tlm_to_gem5.cc | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.cc b/src/systemc/tlm_bridge/tlm_to_gem5.cc index 47a2fba08e..c02efe7437 100644 --- a/src/systemc/tlm_bridge/tlm_to_gem5.cc +++ b/src/systemc/tlm_bridge/tlm_to_gem5.cc @@ -207,6 +207,29 @@ void TlmToGem5Bridge::sendBeginResp(tlm::tlm_generic_payload &trans, sc_core::sc_time &delay) { + MemBackdoor::Flags flags; + switch (trans.get_command()) { + case tlm::TLM_READ_COMMAND: + flags = MemBackdoor::Readable; + break; + case tlm::TLM_WRITE_COMMAND: + flags = MemBackdoor::Writeable; + break; + default: + panic("TlmToGem5Bridge: " + "received transaction with unsupported command"); + } + Addr start_addr = trans.get_address(); + Addr length = trans.get_data_length(); + + MemBackdoorReq req({start_addr, start_addr + length}, flags); + MemBackdoorPtr backdoor = nullptr; + + bmp.sendMemBackdoorReq(req, backdoor); + + if (backdoor) + trans.set_dmi_allowed(true); + tlm::tlm_phase phase = tlm::BEGIN_RESP; auto status = socket->nb_transport_bw(trans, phase, delay); @@ -574,12 +597,12 @@ TlmToGem5Bridge::before_end_of_elaboration() DPRINTF(TlmBridge, "register blocking interface"); socket.register_b_transport( this, &TlmToGem5Bridge::b_transport); - socket.register_get_direct_mem_ptr( - this, &TlmToGem5Bridge::get_direct_mem_ptr); } else { panic("gem5 operates neither in Timing nor in Atomic mode"); } + socket.register_get_direct_mem_ptr( + this, &TlmToGem5Bridge::get_direct_mem_ptr); socket.register_transport_dbg( this, &TlmToGem5Bridge::transport_dbg); From a23641e01fc085fa59528b2fa1c404915bc485bc Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Tue, 6 Dec 2022 10:39:18 -0800 Subject: [PATCH 045/492] configs: Fix x86-gapbs-benchmarks.py example With https://gem5-review.googlesource.com/c/public/gem5/+/64791 we updated the configs/example/gem5_library to utilize the `m5.simulate` module. The GAPBS benchmark example uses the "WORKBEGIN" and "WORKEND" exit events to specify the ROI. The patch incorrectly assumed an "EXIT" exit event were used. As such, the "test-gem5-library-example-x86-gapbs-benchmarks-ALL-x86_64-opt-MESI_Two_Level" test was not properly running, causing the Nightly test to fail: https://jenkins.gem5.org/job/nightly/444. This patch fixes this error. Change-Id: I207fe3563c8d9c59bcb79428fe62d2d2bbccd013 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66512 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Jason Lowe-Power --- configs/example/gem5_library/x86-gapbs-benchmarks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py index 638d34b599..6ab37479f9 100644 --- a/configs/example/gem5_library/x86-gapbs-benchmarks.py +++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py @@ -211,7 +211,7 @@ board.set_kernel_disk_workload( ) -def handle_exit(): +def handle_workbegin(): print("Done booting Linux") print("Resetting stats at the start of ROI!") m5.stats.reset() @@ -219,6 +219,9 @@ def handle_exit(): start_tick = m5.curTick() processor.switch() yield False # E.g., continue the simulation. + + +def handle_workend(): print("Dump stats at the end of the ROI!") m5.stats.dump() yield True # Stop the simulation. We're done. @@ -227,7 +230,8 @@ def handle_exit(): simulator = Simulator( board=board, on_exit_event={ - ExitEvent.EXIT: handle_exit(), + ExitEvent.WORKBEGIN: handle_workbegin(), + ExitEvent.WORKEND: handle_workend(), }, ) From e81aa1cd860fc4a0c899c3601d098e4ec3eb8464 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Tue, 6 Dec 2022 10:48:48 -0800 Subject: [PATCH 046/492] configs: Alter x86-npb-benchmarks.py to exit after WORKEND While the config script will still function without exiting the SimLoop after the "WORKEND" exit event, there's no need for the simulation to continue beyond this point. Change-Id: I60691215e9516fa1eeb8b8502f2bc5a09de2969b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66513 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Bobby Bruce --- configs/example/gem5_library/x86-npb-benchmarks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py index 2cb314303f..ff363e449c 100644 --- a/configs/example/gem5_library/x86-npb-benchmarks.py +++ b/configs/example/gem5_library/x86-npb-benchmarks.py @@ -237,12 +237,11 @@ def handle_workbegin(): # marked by `workend`. # We exepect that ROI ends with `workend` or `simulate() limit reached`. -# Otherwise the simulation ended unexpectedly. def handle_workend(): print("Dump stats at the end of the ROI!") m5.stats.dump() - yield False + yield True simulator = Simulator( From bd319560605f1e3eebf828efd7e06206874d6515 Mon Sep 17 00:00:00 2001 From: Jarvis Date: Wed, 7 Dec 2022 11:18:30 -0600 Subject: [PATCH 047/492] tests: Add replacement policy tests Add tests to test the correctness of replacement policies using TrafficGen. Enable debug-flags to print the hit and miss messages so that you can compare the results with the comments in the tests. Even though the tests are targeting specific replacement policies, they can be reused to test all replacement policies. Change-Id: I3a8013fbcb19adae25b0818ac9e4b0be60be0689 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/60389 Tested-by: kokoro Maintainer: Daniel Carvalho Reviewed-by: Daniel Carvalho --- tests/gem5/replacement-policies/README | 29 +++++ .../replacement-policies/cache_hierarchies.py | 49 ++++++++ .../replacement-policies/ref/fifo_test1_ld | 13 +++ .../replacement-policies/ref/fifo_test1_st | 13 +++ .../replacement-policies/ref/fifo_test2_ld | 13 +++ .../replacement-policies/ref/fifo_test2_st | 13 +++ .../replacement-policies/ref/lfu_test1_ld | 10 ++ .../replacement-policies/ref/lfu_test1_st | 10 ++ .../replacement-policies/ref/lfu_test2_ld | 13 +++ .../replacement-policies/ref/lfu_test2_st | 13 +++ .../replacement-policies/ref/lfu_test3_ld | 28 +++++ .../replacement-policies/ref/lfu_test3_st | 28 +++++ .../replacement-policies/ref/lip_test1_ld | 18 +++ .../replacement-policies/ref/lip_test1_st | 18 +++ .../replacement-policies/ref/lru_test1_ld | 19 +++ .../replacement-policies/ref/lru_test1_st | 19 +++ .../replacement-policies/ref/lru_test2_ld | 17 +++ .../replacement-policies/ref/lru_test2_st | 17 +++ .../replacement-policies/ref/lru_test3_ld | 13 +++ .../replacement-policies/ref/lru_test3_st | 13 +++ .../replacement-policies/ref/lru_test4_ld | 13 +++ .../replacement-policies/ref/lru_test4_st | 13 +++ .../replacement-policies/ref/mru_test1_ld | 13 +++ .../replacement-policies/ref/mru_test1_st | 13 +++ .../replacement-policies/ref/mru_test2_ld | 14 +++ .../replacement-policies/ref/mru_test2_st | 14 +++ .../replacement-policies/ref/nru_test1_ld | 14 +++ .../replacement-policies/ref/nru_test1_st | 14 +++ .../replacement-policies/ref/rrip_test1_ld | 13 +++ .../replacement-policies/ref/rrip_test1_st | 13 +++ .../replacement-policies/ref/rrip_test2_ld | 16 +++ .../replacement-policies/ref/rrip_test2_st | 16 +++ .../ref/second_chance_test1_ld | 13 +++ .../ref/second_chance_test1_st | 13 +++ .../ref/second_chance_test2_ld | 16 +++ .../ref/second_chance_test2_st | 16 +++ .../ref/second_chance_test3_ld | 18 +++ .../ref/second_chance_test3_st | 18 +++ .../ref/tree_plru_test1_ld | 13 +++ .../ref/tree_plru_test1_st | 13 +++ .../ref/tree_plru_test2_ld | 11 ++ .../ref/tree_plru_test2_st | 11 ++ .../ref/tree_plru_test3_ld | 14 +++ .../ref/tree_plru_test3_st | 14 +++ .../run_replacement_policy_test.py | 95 +++++++++++++++ .../test_replacement_policies.py | 110 ++++++++++++++++++ .../traces/fifo_test1_ld.py | 70 +++++++++++ .../traces/fifo_test1_st.py | 70 +++++++++++ .../traces/fifo_test2_ld.py | 71 +++++++++++ .../traces/fifo_test2_st.py | 71 +++++++++++ .../traces/lfu_test1_ld.py | 62 ++++++++++ .../traces/lfu_test1_st.py | 62 ++++++++++ .../traces/lfu_test2_ld.py | 66 +++++++++++ .../traces/lfu_test2_st.py | 66 +++++++++++ .../traces/lfu_test3_ld.py | 87 ++++++++++++++ .../traces/lfu_test3_st.py | 87 ++++++++++++++ .../traces/lip_test1_ld.py | 80 +++++++++++++ .../traces/lip_test1_st.py | 80 +++++++++++++ .../traces/lru_test1_ld.py | 76 ++++++++++++ .../traces/lru_test1_st.py | 76 ++++++++++++ .../traces/lru_test2_ld.py | 71 +++++++++++ .../traces/lru_test2_st.py | 71 +++++++++++ .../traces/lru_test3_ld.py | 62 ++++++++++ .../traces/lru_test3_st.py | 62 ++++++++++ .../traces/lru_test4_ld.py | 63 ++++++++++ .../traces/lru_test4_st.py | 63 ++++++++++ .../traces/mru_test1_ld.py | 63 ++++++++++ .../traces/mru_test1_st.py | 63 ++++++++++ .../traces/mru_test2_ld.py | 66 +++++++++++ .../traces/mru_test2_st.py | 66 +++++++++++ .../traces/nru_test1_ld.py | 71 +++++++++++ .../traces/nru_test1_st.py | 71 +++++++++++ .../traces/rrip_test1_ld.py | 67 +++++++++++ .../traces/rrip_test1_st.py | 67 +++++++++++ .../traces/rrip_test2_ld.py | 71 +++++++++++ .../traces/rrip_test2_st.py | 71 +++++++++++ .../traces/second_chance_test1_ld.py | 65 +++++++++++ .../traces/second_chance_test1_st.py | 65 +++++++++++ .../traces/second_chance_test2_ld.py | 71 +++++++++++ .../traces/second_chance_test2_st.py | 71 +++++++++++ .../traces/second_chance_test3_ld.py | 75 ++++++++++++ .../traces/second_chance_test3_st.py | 75 ++++++++++++ .../traces/tree_plru_test1_ld.py | 65 +++++++++++ .../traces/tree_plru_test1_st.py | 65 +++++++++++ .../traces/tree_plru_test2_ld.py | 61 ++++++++++ .../traces/tree_plru_test2_st.py | 61 ++++++++++ .../traces/tree_plru_test3_ld.py | 67 +++++++++++ .../traces/tree_plru_test3_st.py | 67 +++++++++++ 88 files changed, 3807 insertions(+) create mode 100644 tests/gem5/replacement-policies/README create mode 100644 tests/gem5/replacement-policies/cache_hierarchies.py create mode 100644 tests/gem5/replacement-policies/ref/fifo_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/fifo_test1_st create mode 100644 tests/gem5/replacement-policies/ref/fifo_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/fifo_test2_st create mode 100644 tests/gem5/replacement-policies/ref/lfu_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/lfu_test1_st create mode 100644 tests/gem5/replacement-policies/ref/lfu_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/lfu_test2_st create mode 100644 tests/gem5/replacement-policies/ref/lfu_test3_ld create mode 100644 tests/gem5/replacement-policies/ref/lfu_test3_st create mode 100644 tests/gem5/replacement-policies/ref/lip_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/lip_test1_st create mode 100644 tests/gem5/replacement-policies/ref/lru_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/lru_test1_st create mode 100644 tests/gem5/replacement-policies/ref/lru_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/lru_test2_st create mode 100644 tests/gem5/replacement-policies/ref/lru_test3_ld create mode 100644 tests/gem5/replacement-policies/ref/lru_test3_st create mode 100644 tests/gem5/replacement-policies/ref/lru_test4_ld create mode 100644 tests/gem5/replacement-policies/ref/lru_test4_st create mode 100644 tests/gem5/replacement-policies/ref/mru_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/mru_test1_st create mode 100644 tests/gem5/replacement-policies/ref/mru_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/mru_test2_st create mode 100644 tests/gem5/replacement-policies/ref/nru_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/nru_test1_st create mode 100644 tests/gem5/replacement-policies/ref/rrip_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/rrip_test1_st create mode 100644 tests/gem5/replacement-policies/ref/rrip_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/rrip_test2_st create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test1_st create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test2_st create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test3_ld create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test3_st create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test1_ld create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test1_st create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test2_ld create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test2_st create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test3_ld create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test3_st create mode 100644 tests/gem5/replacement-policies/run_replacement_policy_test.py create mode 100644 tests/gem5/replacement-policies/test_replacement_policies.py create mode 100644 tests/gem5/replacement-policies/traces/fifo_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/fifo_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/fifo_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/fifo_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/lfu_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lfu_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/lfu_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lfu_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/lfu_test3_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lfu_test3_st.py create mode 100644 tests/gem5/replacement-policies/traces/lip_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lip_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test3_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test3_st.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test4_ld.py create mode 100644 tests/gem5/replacement-policies/traces/lru_test4_st.py create mode 100644 tests/gem5/replacement-policies/traces/mru_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/mru_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/mru_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/mru_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/nru_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/nru_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/rrip_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/rrip_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/rrip_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/rrip_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test3_ld.py create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test3_st.py create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test1_st.py create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test2_st.py create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test3_st.py diff --git a/tests/gem5/replacement-policies/README b/tests/gem5/replacement-policies/README new file mode 100644 index 0000000000..626d42e9c9 --- /dev/null +++ b/tests/gem5/replacement-policies/README @@ -0,0 +1,29 @@ +There are two seperate files targeting loads and stores for each test. +In each test file, each letter in the comments represents a 64-bit address +range. For example, A represents the address from 0 to 63, B represents +the address from 64 to 127, C represents the address from 128 to 191, and so on. +If you enable debug flags to print the hits and misses information, you +can compare the results with your expectation. This test can be used to +test the correctness of the replacement policy. The first block will +always get an eviction. + +The format of test files should be using traffic generator + +To emulate 4-way 1-set cache with implementation of 4-way 2-set cache, +we will use A, C, E, G, I, K, M, O instead of A, B, C, D, E, F, G, H, +so they will never get to the second set + +The linear traffic generator has + + + +Addresses are expressed as decimal numbers. The period in the linear +and random state is from a uniform random distribution over the +interval. If a specific value is desired, then the min and max can +be set to the same value. + +The duration(in ticks) is calculated by the time needed for accessing a cache +line * (the number of cache lines accessed in this state + 1). For example, to +access address 0 to address 1023, the duration should be 510000 ticks, which is +calcualted by 510000 = 30000 * (1024/64 + 1). Please note that 30000 is assumed +to be the number of ticks in one period here. diff --git a/tests/gem5/replacement-policies/cache_hierarchies.py b/tests/gem5/replacement-policies/cache_hierarchies.py new file mode 100644 index 0000000000..6177dd4ac9 --- /dev/null +++ b/tests/gem5/replacement-policies/cache_hierarchies.py @@ -0,0 +1,49 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Type + +from gem5.utils.override import overrides +from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( + MIExampleCacheHierarchy, +) +from gem5.components.boards.abstract_board import AbstractBoard +from m5.objects.ReplacementPolicies import BaseReplacementPolicy + + +class ModMIExampleCacheHierarchy(MIExampleCacheHierarchy): + def __init__(self, replacement_policy_class: Type[BaseReplacementPolicy]): + super().__init__(size="512B", assoc="4") + self._replacement_policy_class = replacement_policy_class + + @overrides(MIExampleCacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + super().incorporate_cache(board) + for controller in self._controllers: + controller.cacheMemory.replacement_policy = ( + self._replacement_policy_class() + ) diff --git a/tests/gem5/replacement-policies/ref/fifo_test1_ld b/tests/gem5/replacement-policies/ref/fifo_test1_ld new file mode 100644 index 0000000000..264fcf6008 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/fifo_test1_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/fifo_test1_st b/tests/gem5/replacement-policies/ref/fifo_test1_st new file mode 100644 index 0000000000..264fcf6008 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/fifo_test1_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/fifo_test2_ld b/tests/gem5/replacement-policies/ref/fifo_test2_ld new file mode 100644 index 0000000000..08a91b696e --- /dev/null +++ b/tests/gem5/replacement-policies/ref/fifo_test2_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/fifo_test2_st b/tests/gem5/replacement-policies/ref/fifo_test2_st new file mode 100644 index 0000000000..08a91b696e --- /dev/null +++ b/tests/gem5/replacement-policies/ref/fifo_test2_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/lfu_test1_ld b/tests/gem5/replacement-policies/ref/lfu_test1_ld new file mode 100644 index 0000000000..edc9399c9d --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lfu_test1_ld @@ -0,0 +1,10 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] +. diff --git a/tests/gem5/replacement-policies/ref/lfu_test1_st b/tests/gem5/replacement-policies/ref/lfu_test1_st new file mode 100644 index 0000000000..edc9399c9d --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lfu_test1_st @@ -0,0 +1,10 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] +. diff --git a/tests/gem5/replacement-policies/ref/lfu_test2_ld b/tests/gem5/replacement-policies/ref/lfu_test2_ld new file mode 100644 index 0000000000..5a3618ce82 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lfu_test2_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/lfu_test2_st b/tests/gem5/replacement-policies/ref/lfu_test2_st new file mode 100644 index 0000000000..5a3618ce82 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lfu_test2_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/lfu_test3_ld b/tests/gem5/replacement-policies/ref/lfu_test3_ld new file mode 100644 index 0000000000..24cdc80257 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lfu_test3_ld @@ -0,0 +1,28 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 211000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 434000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 931000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 991000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +1051000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +1159000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] +1219000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +1231000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +1291000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +1351000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +1459000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] +1519000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/lfu_test3_st b/tests/gem5/replacement-policies/ref/lfu_test3_st new file mode 100644 index 0000000000..24cdc80257 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lfu_test3_st @@ -0,0 +1,28 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 211000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 434000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 931000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 991000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +1051000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +1159000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] +1219000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +1231000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +1291000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +1351000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +1459000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] +1519000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/lip_test1_ld b/tests/gem5/replacement-policies/ref/lip_test1_ld new file mode 100644 index 0000000000..2b895a2476 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lip_test1_ld @@ -0,0 +1,18 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/lip_test1_st b/tests/gem5/replacement-policies/ref/lip_test1_st new file mode 100644 index 0000000000..2b895a2476 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lip_test1_st @@ -0,0 +1,18 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test1_ld b/tests/gem5/replacement-policies/ref/lru_test1_ld new file mode 100644 index 0000000000..57688e4b03 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test1_ld @@ -0,0 +1,19 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 979000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test1_st b/tests/gem5/replacement-policies/ref/lru_test1_st new file mode 100644 index 0000000000..57688e4b03 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test1_st @@ -0,0 +1,19 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 979000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test2_ld b/tests/gem5/replacement-policies/ref/lru_test2_ld new file mode 100644 index 0000000000..d077d93a3b --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test2_ld @@ -0,0 +1,17 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 811000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test2_st b/tests/gem5/replacement-policies/ref/lru_test2_st new file mode 100644 index 0000000000..d077d93a3b --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test2_st @@ -0,0 +1,17 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 811000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test3_ld b/tests/gem5/replacement-policies/ref/lru_test3_ld new file mode 100644 index 0000000000..4a5252f612 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test3_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test3_st b/tests/gem5/replacement-policies/ref/lru_test3_st new file mode 100644 index 0000000000..4a5252f612 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test3_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test4_ld b/tests/gem5/replacement-policies/ref/lru_test4_ld new file mode 100644 index 0000000000..93509a9c11 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test4_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] +. diff --git a/tests/gem5/replacement-policies/ref/lru_test4_st b/tests/gem5/replacement-policies/ref/lru_test4_st new file mode 100644 index 0000000000..93509a9c11 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/lru_test4_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] +. diff --git a/tests/gem5/replacement-policies/ref/mru_test1_ld b/tests/gem5/replacement-policies/ref/mru_test1_ld new file mode 100644 index 0000000000..ff596b6627 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/mru_test1_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/mru_test1_st b/tests/gem5/replacement-policies/ref/mru_test1_st new file mode 100644 index 0000000000..ff596b6627 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/mru_test1_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/mru_test2_ld b/tests/gem5/replacement-policies/ref/mru_test2_ld new file mode 100644 index 0000000000..f1d0fa1b63 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/mru_test2_ld @@ -0,0 +1,14 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/mru_test2_st b/tests/gem5/replacement-policies/ref/mru_test2_st new file mode 100644 index 0000000000..f1d0fa1b63 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/mru_test2_st @@ -0,0 +1,14 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/nru_test1_ld b/tests/gem5/replacement-policies/ref/nru_test1_ld new file mode 100644 index 0000000000..7245bc30c9 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/nru_test1_ld @@ -0,0 +1,14 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/nru_test1_st b/tests/gem5/replacement-policies/ref/nru_test1_st new file mode 100644 index 0000000000..7245bc30c9 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/nru_test1_st @@ -0,0 +1,14 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/rrip_test1_ld b/tests/gem5/replacement-policies/ref/rrip_test1_ld new file mode 100644 index 0000000000..c8d4b70917 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/rrip_test1_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/rrip_test1_st b/tests/gem5/replacement-policies/ref/rrip_test1_st new file mode 100644 index 0000000000..c8d4b70917 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/rrip_test1_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/rrip_test2_ld b/tests/gem5/replacement-policies/ref/rrip_test2_ld new file mode 100644 index 0000000000..e2c8c83028 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/rrip_test2_ld @@ -0,0 +1,16 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 151000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 494000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/rrip_test2_st b/tests/gem5/replacement-policies/ref/rrip_test2_st new file mode 100644 index 0000000000..e2c8c83028 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/rrip_test2_st @@ -0,0 +1,16 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 151000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 494000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/second_chance_test1_ld b/tests/gem5/replacement-policies/ref/second_chance_test1_ld new file mode 100644 index 0000000000..96d56df962 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/second_chance_test1_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/second_chance_test1_st b/tests/gem5/replacement-policies/ref/second_chance_test1_st new file mode 100644 index 0000000000..96d56df962 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/second_chance_test1_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/second_chance_test2_ld b/tests/gem5/replacement-policies/ref/second_chance_test2_ld new file mode 100644 index 0000000000..c9ab1abd81 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/second_chance_test2_ld @@ -0,0 +1,16 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/second_chance_test2_st b/tests/gem5/replacement-policies/ref/second_chance_test2_st new file mode 100644 index 0000000000..c9ab1abd81 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/second_chance_test2_st @@ -0,0 +1,16 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/second_chance_test3_ld b/tests/gem5/replacement-policies/ref/second_chance_test3_ld new file mode 100644 index 0000000000..f860d5cd14 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/second_chance_test3_ld @@ -0,0 +1,18 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 871000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/second_chance_test3_st b/tests/gem5/replacement-policies/ref/second_chance_test3_st new file mode 100644 index 0000000000..f860d5cd14 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/second_chance_test3_st @@ -0,0 +1,18 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] + 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 871000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180] +. diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test1_ld b/tests/gem5/replacement-policies/ref/tree_plru_test1_ld new file mode 100644 index 0000000000..6a94bac187 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/tree_plru_test1_ld @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test1_st b/tests/gem5/replacement-policies/ref/tree_plru_test1_st new file mode 100644 index 0000000000..6a94bac187 --- /dev/null +++ b/tests/gem5/replacement-policies/ref/tree_plru_test1_st @@ -0,0 +1,13 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300] + 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] +. diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test2_ld b/tests/gem5/replacement-policies/ref/tree_plru_test2_ld new file mode 100644 index 0000000000..5ac1f3506c --- /dev/null +++ b/tests/gem5/replacement-policies/ref/tree_plru_test2_ld @@ -0,0 +1,11 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] +. diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test2_st b/tests/gem5/replacement-policies/ref/tree_plru_test2_st new file mode 100644 index 0000000000..5ac1f3506c --- /dev/null +++ b/tests/gem5/replacement-policies/ref/tree_plru_test2_st @@ -0,0 +1,11 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] +. diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test3_ld b/tests/gem5/replacement-policies/ref/tree_plru_test3_ld new file mode 100644 index 0000000000..83662e51ea --- /dev/null +++ b/tests/gem5/replacement-policies/ref/tree_plru_test3_ld @@ -0,0 +1,14 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test3_st b/tests/gem5/replacement-policies/ref/tree_plru_test3_st new file mode 100644 index 0000000000..83662e51ea --- /dev/null +++ b/tests/gem5/replacement-policies/ref/tree_plru_test3_st @@ -0,0 +1,14 @@ +Global frequency set at 1000000000000 ticks per second +Beginning simulation! + 74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80] + 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100] + 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180] + 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200] + 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280] + 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] + 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100] + 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0] + 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80] +. diff --git a/tests/gem5/replacement-policies/run_replacement_policy_test.py b/tests/gem5/replacement-policies/run_replacement_policy_test.py new file mode 100644 index 0000000000..10061094b4 --- /dev/null +++ b/tests/gem5/replacement-policies/run_replacement_policy_test.py @@ -0,0 +1,95 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +from importlib.machinery import SourceFileLoader + +from cache_hierarchies import ModMIExampleCacheHierarchy + +import m5 + +from m5.debug import flags +from m5.objects import Root +from gem5.components.boards.test_board import TestBoard +from gem5.components.memory.simple import SingleChannelSimpleMemory +from gem5.components.processors.complex_generator import ComplexGenerator + +argparser = argparse.ArgumentParser() + +argparser.add_argument( + "config_name", + type=str, + help="Name of the python file " + "including the defintion of a python generator and " + "importing the right replacement policy. The python " + "generator should only assume one positional argument " + "and be named python_generator. The replacement policy" + " should be imported as rp.", +) +argparser.add_argument( + "config_path", + type=str, + help="Path to the python file" "specified by config_name.", +) + +args = argparser.parse_args() + +module = SourceFileLoader(args.config_name, args.config_path).load_module() +python_generator = module.python_generator +rp_class = module.rp + +flags["RubyHitMiss"].enable() + +cache_hierarchy = ModMIExampleCacheHierarchy(rp_class) + +memory = SingleChannelSimpleMemory( + latency="30ns", + latency_var="0ns", + bandwidth="12.8GiB/s", + size="512MiB", +) + +generator = ComplexGenerator() +generator.set_traffic_from_python_generator(python_generator) + +# We use the Test Board. This is a special board to run traffic generation +# tasks +motherboard = TestBoard( + clk_freq="1GHz", + generator=generator, # We pass the traffic generator as the processor. + memory=memory, + cache_hierarchy=cache_hierarchy, +) +root = Root(full_system=False, system=motherboard) + +m5.instantiate() + +generator.start_traffic() +print("Beginning simulation!") +exit_event = m5.simulate() +print( + "Exiting @ tick {} because {}.".format(m5.curTick(), exit_event.getCause()) +) diff --git a/tests/gem5/replacement-policies/test_replacement_policies.py b/tests/gem5/replacement-policies/test_replacement_policies.py new file mode 100644 index 0000000000..3a30c0a070 --- /dev/null +++ b/tests/gem5/replacement-policies/test_replacement_policies.py @@ -0,0 +1,110 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from testlib import * + + +def test_replacement_policy(config_name: str, config_path: str) -> None: + name = f"test-replacement-policy-{config_name}" + + verifiers = ( + verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref", config_name[7:])), + ) + gem5_verify_config( + name=name, + fixtures=(), + verifiers=verifiers, + config=joinpath( + config.base_dir, + "tests", + "gem5", + "replacement-policies", + "run_replacement_policy_test.py", + ), + config_args=[config_name, config_path], + valid_isas=(constants.null_tag,), + protocol="MI_example", + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, + ) + + +def create_replacement_policy_tests(traces): + this_dir = os.path.dirname(__file__) + for trace in traces: + config_name = trace.split(".")[0] + config_path = os.path.join(this_dir, trace) + test_replacement_policy(config_name, config_path) + + +traces = [ + "traces/fifo_test1_ld.py", + "traces/fifo_test2_ld.py", + "traces/lru_test3_ld.py", + "traces/lru_test4_ld.py", + "traces/lfu_test1_ld.py", + "traces/lfu_test2_ld.py", + "traces/lfu_test3_ld.py", + "traces/lip_test1_ld.py", + "traces/lru_test1_ld.py", + "traces/lru_test2_ld.py", + "traces/mru_test1_ld.py", + "traces/mru_test2_ld.py", + "traces/nru_test1_ld.py", + "traces/rrip_test1_ld.py", + "traces/rrip_test2_ld.py", + "traces/second_chance_test1_ld.py", + "traces/second_chance_test2_ld.py", + "traces/second_chance_test3_ld.py", + "traces/tree_plru_test1_ld.py", + "traces/tree_plru_test2_ld.py", + "traces/tree_plru_test3_ld.py", + "traces/fifo_test1_st.py", + "traces/fifo_test2_st.py", + "traces/lru_test3_st.py", + "traces/lru_test4_st.py", + "traces/lfu_test1_st.py", + "traces/lfu_test2_st.py", + "traces/lfu_test3_st.py", + "traces/lip_test1_st.py", + "traces/lru_test1_st.py", + "traces/lru_test2_st.py", + "traces/mru_test1_st.py", + "traces/mru_test2_st.py", + "traces/nru_test1_st.py", + "traces/rrip_test1_st.py", + "traces/rrip_test2_st.py", + "traces/second_chance_test1_st.py", + "traces/second_chance_test2_st.py", + "traces/second_chance_test3_st.py", + "traces/tree_plru_test1_st.py", + "traces/tree_plru_test2_st.py", + "traces/tree_plru_test3_st.py", +] +create_replacement_policy_tests(traces) diff --git a/tests/gem5/replacement-policies/traces/fifo_test1_ld.py b/tests/gem5/replacement-policies/traces/fifo_test1_ld.py new file mode 100644 index 0000000000..80e573fb0f --- /dev/null +++ b/tests/gem5/replacement-policies/traces/fifo_test1_ld.py @@ -0,0 +1,70 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, I, K, A, C, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with FIFO replacement policy, you will observe: +# m, m, m, m, m, m, m, m, m, m, where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted. +# I misses, searches for a victim and selects A. +# Now the cache stores ([I, C*, E, G],[ , , ,]). +# K misses, searches for a victim and selects C. +# Now the cache stores ([I, K, E*, G],[ , , ,]). +# A misses, searches for a victim and selects E. +# Now the cache stores ([I, K, A, G*],[ , , ,]). +# C misses, searches for a victim and selects G. +# Now the cache stores ([I*, K, A, C],[ , , ,]). +# E misses, searches for a victim and selects I. +# Now the cache stores ([E, K*, A, C],[ , , ,]). +# G misses, searches for a victim and selects K. +# Now the cache stores ([E, G, A*, C],[ , , ,]). + +from m5.objects.ReplacementPolicies import FIFORP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/fifo_test1_st.py b/tests/gem5/replacement-policies/traces/fifo_test1_st.py new file mode 100644 index 0000000000..7fdb63dd37 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/fifo_test1_st.py @@ -0,0 +1,70 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, I, K, A, C, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with FIFO replacement policy, you will observe: +# m, m, m, m, m, m, m, m, m, m, where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted. +# I misses, searches for a victim and selects A. +# Now the cache stores ([I, C*, E, G],[ , , ,]). +# K misses, searches for a victim and selects C. +# Now the cache stores ([I, K, E*, G],[ , , ,]). +# A misses, searches for a victim and selects E. +# Now the cache stores ([I, K, A, G*],[ , , ,]). +# C misses, searches for a victim and selects G. +# Now the cache stores ([I*, K, A, C],[ , , ,]). +# E misses, searches for a victim and selects I. +# Now the cache stores ([E, K*, A, C],[ , , ,]). +# G misses, searches for a victim and selects K. +# Now the cache stores ([E, G, A*, C],[ , , ,]). + +from m5.objects.ReplacementPolicies import FIFORP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/fifo_test2_ld.py b/tests/gem5/replacement-policies/traces/fifo_test2_ld.py new file mode 100644 index 0000000000..5f95ad7814 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/fifo_test2_ld.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, C, I, K, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with FIFO replacement policy, you will observe: +# m, m, m, m, h, h, m, m, h, h +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted. +# A hits. +# Now the cache stores ([A*, C, E, G],[ , , ,]). +# C hits. +# Now the cache stores ([A*, C, E, G],[ , , ,]). +# I misses, searches for a victim and selects A. +# Now the cache stores ([I, C*, E, G],[ , , ,]). +# K misses, searches for a victim and selects C. +# Now the cache stores ([I, K, E*, G],[ , , ,]). +# E hits. +# Now the cache stores ([I, K, E*, G],[ , , ,]). +# G hits +# Now the cache stores ([I, K, E*, G],[ , , ,]). + +from m5.objects.ReplacementPolicies import FIFORP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/fifo_test2_st.py b/tests/gem5/replacement-policies/traces/fifo_test2_st.py new file mode 100644 index 0000000000..71866d890f --- /dev/null +++ b/tests/gem5/replacement-policies/traces/fifo_test2_st.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores. +# Access pattern: A, C, E, G, A, C, I, K, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with FIFO replacement policy, you will observe: +# m, m, m, m, h, h, m, m, h, h +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted. +# A hits. +# Now the cache stores ([A*, C, E, G],[ , , ,]). +# C hits. +# Now the cache stores ([A*, C, E, G],[ , , ,]). +# I misses, searches for a victim and selects A. +# Now the cache stores ([I, C*, E, G],[ , , ,]). +# K misses, searches for a victim and selects C. +# Now the cache stores ([I, K, E*, G],[ , , ,]). +# E hits. +# Now the cache stores ([I, K, E*, G],[ , , ,]). +# G hits +# Now the cache stores ([I, K, E*, G],[ , , ,]). + +from m5.objects.ReplacementPolicies import FIFORP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lfu_test1_ld.py b/tests/gem5/replacement-policies/traces/lfu_test1_ld.py new file mode 100644 index 0000000000..2a88ad3182 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lfu_test1_ld.py @@ -0,0 +1,62 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, I, A, I +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B, with LFU replacement policy, you will observe: +# m, m, m, m, m, m, m, where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A1, C1, E1, G1],[ , , ,]).The number after each letter +# is the count for the accesses for the address range. +# I searches for a victim and selects A since it has the least count. +# Now the cache stores ([I1, C1, E1, G1],[ , , ,]). +# A searches for a victim and selects I since it has the least count. +# Now the cache stores ([A1, C1, E1, G1],[ , , ,]). +# I searches for a victim and selects A since it has the least count. +# Now the cache stores ([I1, C1, E1, G1],[ , , ,]). + +from m5.objects.ReplacementPolicies import LFURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lfu_test1_st.py b/tests/gem5/replacement-policies/traces/lfu_test1_st.py new file mode 100644 index 0000000000..a2c945eee2 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lfu_test1_st.py @@ -0,0 +1,62 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, I, A, I +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B, with LFU replacement policy, you will observe: +# m, m, m, m, m, m, m, where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A1, C1, E1, G1],[ , , ,]).The number after each letter +# is the count for the accesses for the address range. +# I searches for a victim and selects A since it has the least count. +# Now the cache stores ([I1, C1, E1, G1],[ , , ,]). +# A searches for a victim and selects I since it has the least count. +# Now the cache stores ([A1, C1, E1, G1],[ , , ,]). +# I searches for a victim and selects A since it has the least count. +# Now the cache stores ([I1, C1, E1, G1],[ , , ,]). + +from m5.objects.ReplacementPolicies import LFURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lfu_test2_ld.py b/tests/gem5/replacement-policies/traces/lfu_test2_ld.py new file mode 100644 index 0000000000..901d813199 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lfu_test2_ld.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, I, K, M, O, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B, with LFU replacement policy, you will observe: +# m, m, m, m, h, m, m, m, m, h where 'm' means miss, and 'h' means +# hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A1, C1, E1, G1],[ , , ,]).The number after each letter +# is the count for the accesses for the address range. +# A is a hit, and the cache now stores ([A2, C1, E1, G1],[ , , ,]). +# I searches a victim and selects C. Now the cache stores ([A2, I1, E1, G1],[ , , ,]). +# K searches a victim and selects I. Now the cache stores ([A2, K1, E1, G1],[ , , ,]). +# M searches a victim and selects K. Now the cache stores ([A2, M1, E1, G1],[ , , ,]). +# O searches a victim and selects M. Now the cache stores ([A2, O1, E1, G1],[ , , ,]). +# A hits. + +from m5.objects.ReplacementPolicies import LFURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lfu_test2_st.py b/tests/gem5/replacement-policies/traces/lfu_test2_st.py new file mode 100644 index 0000000000..415362614b --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lfu_test2_st.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, I, K, M, O, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B, with LFU replacement policy, you will observe: +# m, m, m, m, h, m, m, m, m, h where 'm' means miss, and 'h' means +# hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores +# ([A1, C1, E1, G1],[ , , ,]).The number after each letter +# is the count for the accesses for the address range. +# A is a hit, and the cache now stores ([A2, C1, E1, G1],[ , , ,]). +# I searches a victim and selects C. Now the cache stores ([A2, I1, E1, G1],[ , , ,]). +# K searches a victim and selects I. Now the cache stores ([A2, K1, E1, G1],[ , , ,]). +# M searches a victim and selects K. Now the cache stores ([A2, M1, E1, G1],[ , , ,]). +# O searches a victim and selects M. Now the cache stores ([A2, O1, E1, G1],[ , , ,]). +# A hits. + +from m5.objects.ReplacementPolicies import LFURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lfu_test3_ld.py b/tests/gem5/replacement-policies/traces/lfu_test3_ld.py new file mode 100644 index 0000000000..a4a59ade64 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lfu_test3_ld.py @@ -0,0 +1,87 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, A, C, C, C, E, G, G, I, E, E, E, E, K, +# A, A, A, A, M, G, G, G, G, O, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B, with LFU replacement policy, you will observe: +# m, h, m, h, h, m, m, h, m, m, h, h, h, m, m, h, h, h, m, m, h, h, +# h, m, m, where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# After two A accesses, three C accesses, one E access and two G accesses, +# the cache stores ([A2,C3,E1,G2],[ , , ,]). The numbers after each letter are the +# counts of accesses to that address range. +# I searches a victim and selects E. Now the cache stores ([A2,C3,I1,G2],[ , , ,]). +# E searches a victim and selects I. Now the cache stores ([A2,C3,E1,G2],[ , , ,]). +# Three E accesses are hits. Now the cache stores ([A2,C3,E4,G2],[ , , ,]). +# K searches a victim and selects A. Now the cache stores ([K1,C3,E4,G2],[ , , ,]). +# A searches a victim and selects K. Now the cache stores ([A1,C3,E4,G2],[ , , ,]). +# Three A accesses are hits. Now the cache stores ([A4,C3,E4,G2],[ , , ,]). +# M searches a victim and selects G. Now the cache stores ([A4,C3,E4,M1],[ , , ,]). +# G searches a victim and selects M. Now the cache stores ([A4,C3,E4,G1],[ , , ,]). +# Three G accesses are hits. Now the cache stores ([A4,C3,E4,G4],[ , , ,]). +# O searches a victim and selects C. Now the cache stores ([A4,O1,E4,G4],[ , , ,]). +# C searches a victim and selects O. Now the cache stores ([A4,C1,E4,G4],[ , , ,]). + +from m5.objects.ReplacementPolicies import LFURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lfu_test3_st.py b/tests/gem5/replacement-policies/traces/lfu_test3_st.py new file mode 100644 index 0000000000..45e2ee2bcd --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lfu_test3_st.py @@ -0,0 +1,87 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, A, C, C, C, E, G, G, I, E, E, E, E, K, +# A, A, A, A, M, G, G, G, G, O, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B, with LFU replacement policy, you will observe: +# m, h, m, h, h, m, m, h, m, m, h, h, h, m, m, h, h, h, m, m, h, h, +# h, m, m, where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# After two A accesses, three C accesses, one E access and two G accesses, +# the cache stores ([A2,C3,E1,G2],[ , , ,]). The numbers after each letter are the +# counts of accesses to that address range. +# I searches a victim and selects E. Now the cache stores ([A2,C3,I1,G2],[ , , ,]). +# E searches a victim and selects I. Now the cache stores ([A2,C3,E1,G2],[ , , ,]). +# Three E accesses are hits. Now the cache stores ([A2,C3,E4,G2],[ , , ,]). +# K searches a victim and selects A. Now the cache stores ([K1,C3,E4,G2],[ , , ,]). +# A searches a victim and selects K. Now the cache stores ([A1,C3,E4,G2],[ , , ,]). +# Three A accesses are hits. Now the cache stores ([A4,C3,E4,G2],[ , , ,]). +# M searches a victim and selects G. Now the cache stores ([A4,C3,E4,M1],[ , , ,]). +# G searches a victim and selects M. Now the cache stores ([A4,C3,E4,G1],[ , , ,]). +# Three G accesses are hits. Now the cache stores ([A4,C3,E4,G4],[ , , ,]). +# O searches a victim and selects C. Now the cache stores ([A4,O1,E4,G4],[ , , ,]). +# C searches a victim and selects O. Now the cache stores ([A4,C1,E4,G4],[ , , ,]). + +from m5.objects.ReplacementPolicies import LFURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lip_test1_ld.py b/tests/gem5/replacement-policies/traces/lip_test1_ld.py new file mode 100644 index 0000000000..c7d70a93f1 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lip_test1_ld.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, I, K, M, O, A, C, E, G, A, I, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B, with LIP replacement policy, +# you will observe: m,m,m,m,m,m,m,m,m,h,h,h,h,m,m +# where 'h' means hit and 'm' means miss. + +# Explanation of this result: +# A,C,E,G are misses, now the cache stores ([A, C, E, G],[ , , ,]) +# I searches for a victim and selects A. +# Now the cache stores ([I, C, E, G],[ , , ,]) +# K searches for a victim and selects I. +# Now the cache stores ([K, C, E, G],[ , , ,]) +# M searches for a victim and selects K. +# Now the cache stores ([M, C, E, G],[ , , ,]) +# O searches for a victim and selects M. +# Now the cache stores ([O, C, E, G],[ , , ,]) +# A searches for a victim and selects O. +# Now the cache stores ([A, C, E, G],[ , , ,]) +# C hits --> C now MRU +# E hits --> E now MRU +# G hits --> G now MRU +# A hits --> A now MRU +# I searches for a victim and selects C since A is MRU. +# Now the cache stores ([A, I, E, G],[ , , ,]). +# C misses + +from m5.objects.ReplacementPolicies import LIPRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lip_test1_st.py b/tests/gem5/replacement-policies/traces/lip_test1_st.py new file mode 100644 index 0000000000..e64db8f12e --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lip_test1_st.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, I, K, M, O, A, C, E, G, A, I, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B, with LIP replacement policy, +# you will observe: m,m,m,m,m,m,m,m,m,h,h,h,h,m,m +# where 'h' means hit and 'm' means miss. + +# Explanation of this result: +# A,C,E,G are misses, now the cache stores ([A, C, E, G],[ , , ,]) +# I searches for a victim and selects A. +# Now the cache stores ([I, C, E, G],[ , , ,]) +# K searches for a victim and selects I. +# Now the cache stores ([K, C, E, G],[ , , ,]) +# M searches for a victim and selects K. +# Now the cache stores ([M, C, E, G],[ , , ,]) +# O searches for a victim and selects M. +# Now the cache stores ([O, C, E, G],[ , , ,]) +# A searches for a victim and selects O. +# Now the cache stores ([A, C, E, G],[ , , ,]) +# C hits --> C now MRU +# E hits --> E now MRU +# G hits --> G now MRU +# A hits --> A now MRU +# I searches for a victim and selects C since A is MRU. +# Now the cache stores ([A, I, E, G],[ , , ,]). +# C misses + +from m5.objects.ReplacementPolicies import LIPRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test1_ld.py b/tests/gem5/replacement-policies/traces/lru_test1_ld.py new file mode 100644 index 0000000000..41b874805f --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test1_ld.py @@ -0,0 +1,76 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, C, I, K, M, O, A, C, I, K, M, O +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With LRU replacement policy, you will observe: +# m, m, m, m, h, h, m, m, m, m, m, m, m, m, m, m, where 'h' means hit and +# 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]). +# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores ([A*, C, I, K],[ , , ,]). +# M searches for a victim and selects A. Now the cache stores ([M, C*, I, K],[ , , ,]). +# O searches for a victim and selects C. Now the cache stores ([M, O, I*, K],[ , , ,]). +# A searches for a victim and selects I. Now the cache stores ([M, O, A, K*],[ , , ,]). +# C searches for a victim and selects K. Now the cache stores ([M*, O, A, C],[ , , ,]). +# I searches for a victim and selects M. Now the cache stores ([I, O*, A, C],[ , , ,]). +# K searches for a victim and selects O. Now the cache stores ([I, K, A*, C],[ , , ,]). +# M searches for a victim and selects A. Now the cache stores ([I, K, M, C*],[ , , ,]). +# O searches for a victim and selects C. Now the cache stores ([I*, K, M, O],[ , , ,]). + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test1_st.py b/tests/gem5/replacement-policies/traces/lru_test1_st.py new file mode 100644 index 0000000000..20a3594102 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test1_st.py @@ -0,0 +1,76 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, C, I, K, M, O, A, C, I, K, M, O +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With LRU replacement policy, you will observe: +# m, m, m, m, h, h, m, m, m, m, m, m, m, m, m, m, where 'h' means hit and +# 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]). +# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores ([A*, C, I, K],[ , , ,]). +# M searches for a victim and selects A. Now the cache stores ([M, C*, I, K],[ , , ,]). +# O searches for a victim and selects C. Now the cache stores ([M, O, I*, K],[ , , ,]). +# A searches for a victim and selects I. Now the cache stores ([M, O, A, K*],[ , , ,]). +# C searches for a victim and selects K. Now the cache stores ([M*, O, A, C],[ , , ,]). +# I searches for a victim and selects M. Now the cache stores ([I, O*, A, C],[ , , ,]). +# K searches for a victim and selects O. Now the cache stores ([I, K, A*, C],[ , , ,]). +# M searches for a victim and selects A. Now the cache stores ([I, K, M, C*],[ , , ,]). +# O searches for a victim and selects C. Now the cache stores ([I*, K, M, O],[ , , ,]). + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test2_ld.py b/tests/gem5/replacement-policies/traces/lru_test2_ld.py new file mode 100644 index 0000000000..24c8a54010 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test2_ld.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, C, I, K, M, O, E, G, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With LRU replacement policy, you will observe: +# m, m, m, m, h, h, m, m, m, m, m, m, h, h, where 'h' means hit and 'm' +# means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]). +# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores (([A*, C, I, K],[ , , ,]). +# M searches for a victim and selects A. Now the cache stores (([M, C*, I, K],[ , , ,]). +# O searches for a victim and selects C. Now the cache stores (([M, O, I*, K],[ , , ,]). +# E searches for a victim and selects I. Now the cache stores (([M, O, E, K*],[ , , ,]). +# G searches for a victim and selects K. Now the cache stores (([M*, O, E, G],[ , , ,]). +# E,G are hits. + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test2_st.py b/tests/gem5/replacement-policies/traces/lru_test2_st.py new file mode 100644 index 0000000000..49dfdfb5db --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test2_st.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, C, I, K, M, O, E, G, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With LRU replacement policy, you will observe: +# m, m, m, m, h, h, m, m, m, m, m, m, h, h, where 'h' means hit and 'm' +# means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]). +# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores (([A*, C, I, K],[ , , ,]). +# M searches for a victim and selects A. Now the cache stores (([M, C*, I, K],[ , , ,]). +# O searches for a victim and selects C. Now the cache stores (([M, O, I*, K],[ , , ,]). +# E searches for a victim and selects I. Now the cache stores (([M, O, E, K*],[ , , ,]). +# G searches for a victim and selects K. Now the cache stores (([M*, O, E, G],[ , , ,]). +# E,G are hits. + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test3_ld.py b/tests/gem5/replacement-policies/traces/lru_test3_ld.py new file mode 100644 index 0000000000..da817e0272 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test3_ld.py @@ -0,0 +1,62 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, C, E, G, I, A +# Each letter represents a 64-bit address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with LRU replacement policy, you will observe: +# m, m, m, m, h, h, h, h, m, m, where 'h' means +# hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# A, C, E, G then hits, and the cache stores ([A*, C, E, G],[ , , ,]). +# I searches for a victim and selects A. Now the cache stores ([E, C*, I, G],[ , , ,]). +# A searches for a victim and selects C. Now the cache stores ([E, A, I*, G],[ , , ,]). + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test3_st.py b/tests/gem5/replacement-policies/traces/lru_test3_st.py new file mode 100644 index 0000000000..57c7fbe020 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test3_st.py @@ -0,0 +1,62 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, C, E, G, I, A +# Each letter represents a 64-bit address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with LRU replacement policy, you will observe: +# m, m, m, m, h, h, h, h, m, m, where 'h' means +# hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# A, C, E, G then hits, and the cache stores ([A*, C, E, G],[ , , ,]). +# I searches for a victim and selects A. Now the cache stores ([E, C*, I, G],[ , , ,]). +# A searches for a victim and selects C. Now the cache stores ([E, A, I*, G],[ , , ,]). + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test4_ld.py b/tests/gem5/replacement-policies/traces/lru_test4_ld.py new file mode 100644 index 0000000000..c6624ed47d --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test4_ld.py @@ -0,0 +1,63 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, E, G, A, C, I, E +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with LRU replacement policy, you will observe: +# m, m, m, m, h, h, h, h, m, m, where 'h' means +# hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# E and G then hits, and the cache stores ([A*, C, E, G],[ , , ,]). +# A and C then hits, and the cache stores ([A, C, E*, G],[ , , ,]). +# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]). +# E searches for a victim and selects G. Now the cache stores ([A*, C, I, E],[ , , ,]). + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/lru_test4_st.py b/tests/gem5/replacement-policies/traces/lru_test4_st.py new file mode 100644 index 0000000000..3b3c26b803 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/lru_test4_st.py @@ -0,0 +1,63 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, E, G, A, C, I, E +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, and each cache +# line is 64B with LRU replacement policy, you will observe: +# m, m, m, m, h, h, h, h, m, m, where 'h' means +# hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# A is marked as the LRU address range. +# E and G then hits, and the cache stores ([A*, C, E, G],[ , , ,]). +# A and C then hits, and the cache stores ([A, C, E*, G],[ , , ,]). +# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]). +# E searches for a victim and selects G. Now the cache stores ([A*, C, I, E],[ , , ,]). + +from m5.objects.ReplacementPolicies import LRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/mru_test1_ld.py b/tests/gem5/replacement-policies/traces/mru_test1_ld.py new file mode 100644 index 0000000000..d93695a3c1 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/mru_test1_ld.py @@ -0,0 +1,63 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, I, K, M, O, A, C, +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With MRU replacement policy, you will observe: +# m, m, m, m, m, m, m, m, h, h, where 'h' means hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]). +# G is marked as the MRU address range. +# I searches for a victim and selects G. Now the cache stores ([A, C, E, I*],[ , , ,]). +# K searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]). +# M searches for a victim and selects K. Now the cache stores ([A, C, E, M*],[ , , ,]). +# O searches for a victim and selects M. Now the cache stores ([A, C, E, O*],[ , , ,]). +# A,C are hits. + +from m5.objects.ReplacementPolicies import MRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/mru_test1_st.py b/tests/gem5/replacement-policies/traces/mru_test1_st.py new file mode 100644 index 0000000000..1b48ff6024 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/mru_test1_st.py @@ -0,0 +1,63 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, I, K, M, O, A, C, +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With MRU replacement policy, you will observe: +# m, m, m, m, m, m, m, m, h, h, where 'h' means hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]). +# G is marked as the MRU address range. +# I searches for a victim and selects G. Now the cache stores ([A, C, E, I*],[ , , ,]). +# K searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]). +# M searches for a victim and selects K. Now the cache stores ([A, C, E, M*],[ , , ,]). +# O searches for a victim and selects M. Now the cache stores ([A, C, E, O*],[ , , ,]). +# A,C are hits. + +from m5.objects.ReplacementPolicies import MRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/mru_test2_ld.py b/tests/gem5/replacement-policies/traces/mru_test2_ld.py new file mode 100644 index 0000000000..4f5bcbcbd5 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/mru_test2_ld.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, I, K, M, O, A, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With MRU replacement policy, you will observe: +# m, m, m, m, h, m, m, m, m, m, h where 'h' means hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]). +# G is marked as the MRU address range. +# A is a hit, now the cache stores ([A*, C, E, G],[ , , ,]). +# I searches for a victim and selects A. Now the cache stores ([I*, C, E, G],[ , , ,]). +# K searches for a victim and selects I. Now the cache stores ([K*, C, E, G],[ , , ,]). +# M searches for a victim and selects K. Now the cache stores ([M*, C, E, G],[ , , ,]). +# O searches for a victim and selects M. Now the cache stores ([O*, C, E, G],[ , , ,]). +# A searches for a victim and selects O. Now the cache stores ([A*, C, E, G],[ , , ,]). +# G is a hit. + +from m5.objects.ReplacementPolicies import MRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/mru_test2_st.py b/tests/gem5/replacement-policies/traces/mru_test2_st.py new file mode 100644 index 0000000000..7ffbde09e2 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/mru_test2_st.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores. +# Access pattern: A, C, E, G, A, I, K, M, O, A, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. With MRU replacement policy, you will observe: +# m, m, m, m, h, m, m, m, m, m, h where 'h' means hit and 'm' means miss. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]). +# G is marked as the MRU address range. +# A is a hit, now the cache stores ([A*, C, E, G],[ , , ,]). +# I searches for a victim and selects A. Now the cache stores ([I*, C, E, G],[ , , ,]). +# K searches for a victim and selects I. Now the cache stores ([K*, C, E, G],[ , , ,]). +# M searches for a victim and selects K. Now the cache stores ([M*, C, E, G],[ , , ,]). +# O searches for a victim and selects M. Now the cache stores ([O*, C, E, G],[ , , ,]). +# A searches for a victim and selects O. Now the cache stores ([A*, C, E, G],[ , , ,]). +# G is a hit. + +from m5.objects.ReplacementPolicies import MRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/nru_test1_ld.py b/tests/gem5/replacement-policies/traces/nru_test1_ld.py new file mode 100644 index 0000000000..f3b89fcc7b --- /dev/null +++ b/tests/gem5/replacement-policies/traces/nru_test1_ld.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, I, A, E, K, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. +# This test can be used to test the correctness of NRU +# replacement policy. The NRU replacement policy will always find +# the victim block from the left side of a cache. More specifically, +# with NRU replacement policy, you will observe: +# m, m, m, m, h, m, m, h, m, h, m, where 'm' means miss, and 'h' means +# hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A0, C0, E0, G0],[ , , ,]). +# The number following each letter is the NRU bit for the address range. +# A is a hit. Now the cache stores ([A0, C0, E0, G0],[ , , ,]). +# I searches a victim and selects A. Now the cache stores ([I0, C1, E1, G1],[ , , ,]). +# A searches a victim and selects C. Now the cache stores ([I0, A0, E1, G1],[ , , ,]). +# E hits. Now the cache stores ([I0, A0, E0, G1],[ , , ,]). +# K searches a victim and selects G. Now the cache stores ([I0, A0, E0, K0],[ , , ,]). +# E hits. Now the cache stores ([I0, A0, E0, K0],[ , , ,]). +# G searches a victim and selects I. Now the cache stores ([G0, A1, E1, K1],[ , , ,]). + +from m5.objects.ReplacementPolicies import NRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/nru_test1_st.py b/tests/gem5/replacement-policies/traces/nru_test1_st.py new file mode 100644 index 0000000000..c5b8738c83 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/nru_test1_st.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, I, A, E, K, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. +# This test can be used to test the correctness of NRU +# replacement policy. The NRU replacement policy will always find +# the victim block from the left side of a cache. More specifically, +# with NRU replacement policy, you will observe: +# m, m, m, m, h, m, m, h, m, h, m, where 'm' means miss, and 'h' means +# hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A0, C0, E0, G0],[ , , ,]). +# The number following each letter is the NRU bit for the address range. +# A is a hit. Now the cache stores ([A0, C0, E0, G0],[ , , ,]). +# I searches a victim and selects A. Now the cache stores ([I0, C1, E1, G1],[ , , ,]). +# A searches a victim and selects C. Now the cache stores ([I0, A0, E1, G1],[ , , ,]). +# E hits. Now the cache stores ([I0, A0, E0, G1],[ , , ,]). +# K searches a victim and selects G. Now the cache stores ([I0, A0, E0, K0],[ , , ,]). +# E hits. Now the cache stores ([I0, A0, E0, K0],[ , , ,]). +# G searches a victim and selects I. Now the cache stores ([G0, A1, E1, K1],[ , , ,]). + +from m5.objects.ReplacementPolicies import NRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/rrip_test1_ld.py b/tests/gem5/replacement-policies/traces/rrip_test1_ld.py new file mode 100644 index 0000000000..e6a674c6d8 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/rrip_test1_ld.py @@ -0,0 +1,67 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, I, K, M, C, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of RRIP +# replacement policy. you will observe: # m, m, m, m, h, m, m, m, m, m. +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A2, C2, E2, G2],[ , , ,]). +# The number following each letter is the RRPV for that address range. +# A is a hit, now the cache stores ([A1, C2, E2, G2],[ , , ,]). +# I searches for a victim and selects the highest RRPV C, since C +# is not saturated, A, E and G will be increased by 1, +# which it stores ([A2, I2, E3, G3],[ , , ,]). +# K searches for a victim and selects E. Now it stores ([A2, I2, K2, G3],[ , , ,]). +# M searches for a victim and selects G. Now it stores ([A2, I2, K2, M2],[ , , ,]). +# C searches for a victim and selects A. Now it stores ([C2, I3, K3, M3],[ , , ,]). +# A searches for a victim and selects I. Now it stores ([C2, A2, K3, M3],[ , , ,]). + +from m5.objects.ReplacementPolicies import RRIPRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/rrip_test1_st.py b/tests/gem5/replacement-policies/traces/rrip_test1_st.py new file mode 100644 index 0000000000..702e2a1bc7 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/rrip_test1_st.py @@ -0,0 +1,67 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, I, K, M, C, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of RRIP +# replacement policy. you will observe: # m, m, m, m, h, m, m, m, m, m. +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A2, C2, E2, G2],[ , , ,]). +# The number following each letter is the RRPV for that address range. +# A is a hit, now the cache stores ([A1, C2, E2, G2],[ , , ,]). +# I searches for a victim and selects the highest RRPV C, since C +# is not saturated, A, E and G will be increased by 1, +# which it stores ([A2, I2, E3, G3],[ , , ,]). +# K searches for a victim and selects E. Now it stores ([A2, I2, K2, G3],[ , , ,]). +# M searches for a victim and selects G. Now it stores ([A2, I2, K2, M2],[ , , ,]). +# C searches for a victim and selects A. Now it stores ([C2, I3, K3, M3],[ , , ,]). +# A searches for a victim and selects I. Now it stores ([C2, A2, K3, M3],[ , , ,]). + +from m5.objects.ReplacementPolicies import RRIPRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/rrip_test2_ld.py b/tests/gem5/replacement-policies/traces/rrip_test2_ld.py new file mode 100644 index 0000000000..b9f2ee026e --- /dev/null +++ b/tests/gem5/replacement-policies/traces/rrip_test2_ld.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# This test is targeting loads. +# Access pattern: A, A, A, C, C, E, E, G, I, K, M, O, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness +# of RRIP replacement policy. More specifically, with RRIP replacement +# policy, you will observe: m, h, h, m, h, m, h, m, m, m, m, m, h +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# After three A access, two C accesses, 2 E accesses and one G access, +# the cache stores ([A0, C1, E1, G2],[ , , ,]). +# The number following each letter is the RRPV for that address range. +# I searches a victim and selects G. Now the cache stores ([A1, C2, E2, I2],[ , , ,]). +# K searches a victim and selects C. Now the cache stores ([A2, K2, E3, I3],[ , , ,]). +# M searches a victim and selects E. Now the cache stores ([A2, K2, M2, I3],[ , , ,]). +# O searches a victim and selects I. NOW the cache stores ([A2, K2, M2, O2],[ , , ,]). +# A hits. + +from m5.objects.ReplacementPolicies import RRIPRP as rp + + +def python_generator(generator): + + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/rrip_test2_st.py b/tests/gem5/replacement-policies/traces/rrip_test2_st.py new file mode 100644 index 0000000000..be23756a95 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/rrip_test2_st.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# This test is targeting stores +# Access pattern: A, A, A, C, C, E, E, G, I, K, M, O, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness +# of RRIP replacement policy. More specifically, with RRIP replacement +# policy, you will observe: m, h, h, m, h, m, h, m, m, m, m, m, h +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# After three A access, two C accesses, 2 E accesses and one G access, +# the cache stores ([A0, C1, E1, G2],[ , , ,]). +# The number following each letter is the RRPV for that address range. +# I searches a victim and selects G. Now the cache stores ([A1, C2, E2, I2],[ , , ,]). +# K searches a victim and selects C. Now the cache stores ([A2, K2, E3, I3],[ , , ,]). +# M searches a victim and selects E. Now the cache stores ([A2, K2, M2, I3],[ , , ,]). +# O searches a victim and selects I. NOW the cache stores ([A2, K2, M2, O2],[ , , ,]). +# A hits. + +from m5.objects.ReplacementPolicies import RRIPRP as rp + + +def python_generator(generator): + + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py b/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py new file mode 100644 index 0000000000..2c527d84c6 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, C, I, K, A, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of Second Chance +# replacement policy. The Second Chance replacement policy will keep the block +# 'A' and 'C' in the cache because of the second chance bit. More specifically, +# with Second Chance replacement policy, you will observe: +# m, m, m, m, h, h, m, m, h, h, where 'm' means miss, and 'h' means hit. + +# Explanation of the result: +# The number after each letter is the second chance bit, which would be set after a re-reference. +# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]). +# A, C are hit. Now the cache stores ([A1, C1, E0, G0],[ , , ,]). +# I searches a victim and selects E. Now the cache stores ([A0, C0, I0, G0],[ , , ,]). +# K searches a victim and selects G. Now the cache stores ([A0, C0, I0, K0],[ , , ,]). +# A, C are hits. + +from m5.objects.ReplacementPolicies import SecondChanceRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/second_chance_test1_st.py b/tests/gem5/replacement-policies/traces/second_chance_test1_st.py new file mode 100644 index 0000000000..bf04697be5 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/second_chance_test1_st.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, C, I, K, A, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of Second Chance +# replacement policy. The Second Chance replacement policy will keep the block +# 'A' and 'C'in the cache because of the second chance bit. More specifically, +# with Second Chance replacement policy, you will observe: +# m, m, m, m, h, h, m, m, h, h, where 'm' means miss, and 'h' means hit. + +# Explanation of the result: +# The number after each letter is the second chance bit, which would be set after a re-reference. +# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]). +# A, C are hit. Now the cache stores ([A1, C1, E0, G0],[ , , ,]). +# I searches a victim and selects E. Now the cache stores ([A0, C0, I0, G0],[ , , ,]). +# K searches a victim and selects G. Now the cache stores ([A0, C0, I0, K0],[ , , ,]). +# A, C are hits. + +from m5.objects.ReplacementPolicies import SecondChanceRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py b/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py new file mode 100644 index 0000000000..d187cbec3f --- /dev/null +++ b/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# access pattern: A, C, E, G, A, C, E, G, I, A, C, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of Second Chance +# replacement policy. The Second Chance replacement policy will keep the block +# 'A' and 'C' in the cache because of the second chance bit. More specifically, +# with Second Chance replacement policy, you will observe: +# m, m, m, m, h, h, h, h, m, m, m, m, m where 'm' means miss, and 'h' means hit. + +# Explanation of the result: +# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]). +# The number after each letter is the second chance bit, which would be set after a re-reference. +# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]). +# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]). +# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]). +# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]). +# E searches a victim and selects G. Now the cache stores ([I0, A0, C0, E0],[ , , ,]). +# G searches a victim and selects I. Now the cache stores ([G0, A0, C0, E0],[ , , ,]). + +from m5.objects.ReplacementPolicies import SecondChanceRP as rp + + +def python_generator(generator): + + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/second_chance_test2_st.py b/tests/gem5/replacement-policies/traces/second_chance_test2_st.py new file mode 100644 index 0000000000..477f31d43c --- /dev/null +++ b/tests/gem5/replacement-policies/traces/second_chance_test2_st.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores. +# access pattern: A, C, E, G, A, C, E, G, I, A, C, E, G +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of Second Chance +# replacement policy. The Second Chance replacement policy will keep the block +# 'A' and 'C' in the cache because of the second chance bit. More specifically, +# with Second Chance replacement policy, you will observe: +# m, m, m, m, h, h, h, h, m, m, m, m, m where 'm' means miss, and 'h' means hit. + +# Explanation of the result: +# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]). +# The number after each letter is the second chance bit, which would be set after a re-reference. +# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]). +# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]). +# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]). +# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]). +# E searches a victim and selects G. Now the cache stores ([I0, A0, C0, E0],[ , , ,]). +# G searches a victim and selects I. Now the cache stores ([G0, A0, C0, E0],[ , , ,]). + + +from m5.objects.ReplacementPolicies import SecondChanceRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py b/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py new file mode 100644 index 0000000000..ee50de6747 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py @@ -0,0 +1,75 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# access pattern: A, C, E, G, A, C, E, G, E, I, A, C, G, E +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of Second Chance +# replacement policy. The Second Chance replacement policy will keep the block +# 'A' and 'C' in the cache because of the second chance bit. More specifically, +# with Second Chance replacement policy, you will observe: +# m, m, m, m, h, h, h, h, h, m, m, m, h, m, h where 'm' means miss, and 'h' means hit. + +# Explanation of the result: +# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]). +# The number after each letter is the second chance bit, which would be set after a re-reference. +# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]). +# E hits. +# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]). +# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]). +# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]). +# G hits. +# E searches a victim and selects I. Now the cache stores ([E0, A0, C0, G0],[ , , ,]). +# G hits + + +from m5.objects.ReplacementPolicies import SecondChanceRP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/second_chance_test3_st.py b/tests/gem5/replacement-policies/traces/second_chance_test3_st.py new file mode 100644 index 0000000000..53dcbffe89 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/second_chance_test3_st.py @@ -0,0 +1,75 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores. +# access pattern: A, C, E, G, A, C, E, G, E, I, A, C, G, E +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B. This test can be used to test the correctness of Second Chance +# replacement policy. The Second Chance replacement policy will keep the block +# 'A' and 'C' in the cache because of the second chance bit. More specifically, +# with Second Chance replacement policy, you will observe: +# m, m, m, m, h, h, h, h, h, m, m, m, h, m, h where 'm' means miss, and 'h' means hit. + +# Explanation of the result: +# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]). +# The number after each letter is the second chance bit, which would be set after a re-reference. +# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]). +# E hits. +# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]). +# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]). +# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]). +# G hits. +# E searches a victim and selects I. Now the cache stores ([E0, A0, C0, G0],[ , , ,]). +# G hits + +from m5.objects.ReplacementPolicies import SecondChanceRP as rp + + +def python_generator(generator): + + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py b/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py new file mode 100644 index 0000000000..419ce019fb --- /dev/null +++ b/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, A, I, K, M, O, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B with TreePLRU replacement policy, +# you will observe: m, m, m, m, h, m, m, m, m, m, +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# and A is the next one to get replaced. +# A hits +# I searches for a victim and selects E. Now the cache stores ([A, C*, I, G],[ , , ,]). +# K searches for a victim and selects C. Now the cache stores ([A, K, I, G*],[ , , ,]). +# M searches for a victim and selects G. Now the cache stores ([A*, K, I, M],[ , , ,]). +# O searches for a victim and selects A. Now the cache stores ([O, K, I*, M],[ , , ,]). +# A searches for a victim and selects I. Now the cache stores ([O, K*, A, M],[ , , ,]). + +from m5.objects.ReplacementPolicies import TreePLRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py b/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py new file mode 100644 index 0000000000..8f677bef0a --- /dev/null +++ b/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, A, I, K, M, O, A +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B with TreePLRU replacement policy, +# you will observe: m, m, m, m, h, m, m, m, m, m, +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# and A is the next one to get replaced. +# A hits +# I searches for a victim and selects E. Now the cache stores ([A, C*, I, G],[ , , ,]). +# K searches for a victim and selects C. Now the cache stores ([A, K, I, G*],[ , , ,]). +# M searches for a victim and selects G. Now the cache stores ([A*, K, I, M],[ , , ,]). +# O searches for a victim and selects A. Now the cache stores ([O, K, I*, M],[ , , ,]). +# A searches for a victim and selects I. Now the cache stores ([O, K*, A, M],[ , , ,]). + +from m5.objects.ReplacementPolicies import TreePLRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py b/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py new file mode 100644 index 0000000000..6793cbee9d --- /dev/null +++ b/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py @@ -0,0 +1,61 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, E, I, K, E +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B with TreePLRU replacement policy, +# you will observe: m, m, m, m, h, m, m, h, +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# and A is the next one to get replaced. +# E hits +# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]). +# E hits + +from m5.objects.ReplacementPolicies import TreePLRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py b/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py new file mode 100644 index 0000000000..ea4332897c --- /dev/null +++ b/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py @@ -0,0 +1,61 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, E, I, K, E +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B with TreePLRU replacement policy, +# you will observe: m, m, m, m, h, m, m, h, +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# and A is the next one to get replaced. +# E hits +# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]). +# E hits + +from m5.objects.ReplacementPolicies import TreePLRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py b/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py new file mode 100644 index 0000000000..f358bb0ce7 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py @@ -0,0 +1,67 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting loads. +# Access pattern: A, C, E, G, E, I, K, C, E, A, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B with TreePLRU replacement policy, +# you will observe: m, m, m, m, h, m, m, h, h +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# and A is the next one to get replaced. +# E hits +# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]). +# C hits. Now the cache stores ([I, C, E*, K],[ , , ,]). +# E hits. Now the cache stores ([I*, C, E, K],[ , , ,]). +# A searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]). +# C hits + +from m5.objects.ReplacementPolicies import TreePLRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0) + + yield generator.createExit(0) diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py b/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py new file mode 100644 index 0000000000..0b689af492 --- /dev/null +++ b/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py @@ -0,0 +1,67 @@ +# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This test is targeting stores +# Access pattern: A, C, E, G, E, I, K, C, E, A, C +# Each letter represents a 64-byte address range. + +# The [] indicate two different sets, and each set has four ways. +# [set0way0, set0way1, set0way2, set0way3], +# [set1way0, set1way1, set1way2, set1way3], +# If you have a 512B cache with 4-way associativity, +# and each cache line is 64B with TreePLRU replacement policy, +# you will observe: m, m, m, m, h, m, m, h, h +# where 'm' means miss, and 'h' means hit. + +# Explanation of this result: +# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]). +# and A is the next one to get replaced. +# E hits +# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]). +# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]). +# C hits. Now the cache stores ([I, C, E*, K],[ , , ,]). +# E hits. Now the cache stores ([I*, C, E, K],[ , , ,]). +# A searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]). +# C hits + +from m5.objects.ReplacementPolicies import TreePLRURP as rp + + +def python_generator(generator): + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0) + yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0) + yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0) + + yield generator.createExit(0) From 1b2252cbc0dbd2473f0e106775419dd8b87992f9 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Tue, 6 Dec 2022 14:13:40 -0800 Subject: [PATCH 048/492] misc: Update .mailmap This commit updates the mailmap since the initial commit in mid-July 2020: https://gem5-review.googlesource.com/c/public/gem5/+/29672. `sort -u` has been run on this file so some previous entries have been moved. Change-Id: I46df1e9675f6f7057b680ca2abbcebdffd50462a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66517 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power --- .mailmap | 221 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 160 insertions(+), 61 deletions(-) diff --git a/.mailmap b/.mailmap index 5125666bbc..49c438d3eb 100644 --- a/.mailmap +++ b/.mailmap @@ -1,37 +1,43 @@ -ARM gem5 Developers Abdul Mutaal Ahmad +adarshpatil +Adrià Armejach Adrià Armejach Adrian Herrera Adrien Pesle -Adrià Armejach Adrià Armejach Akash Bagdia Akash Bagdia Alec Roelke Alec Roelke +Alexander Klimov Alexandru Dutu Alexandru +Alex Richardson Ali Jafri -Ali Saidi Ali Saidi Ali Saidi Ali Saidi +Ali Saidi Ali Saidi Ali Saidi Ali Saidi +Alistair Delva Amin Farmahini Anders Handler -Andrea Mondelli Andrea Mondelli +Andrea Mondelli Andrea Mondelli +Andrea Mondelli Andrea Mondelli Andrea Pellegrini -Andreas Hansson Andreas Hansson Andreas Hansson Andreas Hansson Andreas Hansson Andreas Hansson +Andreas Hansson Andreas Hansson Andreas Hansson Andreas Hansson -Andreas Sandberg Andreas Sandberg Andreas Sandberg Andreas Sandberg +Andreas Sandberg Andreas Sandberg Andreas Sandberg Andreas Sandberg Andrew Bardsley Andrew Bardsley Andrew Lukefahr Andrew Schultz Andriani Mappoura -Ani Udipi +Angie Lee Anis Peysieux +Ani Udipi Anouk Van Laer -Arthur Perais +ARM gem5 Developers +Arthur Perais Arthur Perais +Arun Rodrigues Ashkan Tousi -Austin Harris -Richard D. Strong +Austin Harris Austin Harris Avishai Tvila Ayaz Akram Bagus Hanindhito @@ -41,80 +47,108 @@ Binh Pham Bjoern A. Zeeb Blake Hechtman Blake Hechtman Blake Hechtman Blake Hechtman ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) -Bobby R. Bruce +Bobby R. Bruce Bobby Bruce Boris Shingarov Boris Shingarov Brad Beckmann Brad Beckmann Brad Beckmann Brad Beckmann ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) Brad Danofsky Bradley Wang Bradley +Brandon Potter BKP Brandon Potter bpotter Brandon Potter Brandon Potter -Brandon Potter BKP Brian Grayson Cagdas Dirik cdirik +Carlos Falquez Chander Sudanthi Chander Sudanthi Chander Sudanthi Chander Sudanthi +Charles Jamieson +CHEN Meng Chen Zou +Chia-You Chen +Chow, Marcus Chris Adeniyi-Jones -Chris Emmons Chris Emmons Chris Emmons Chris Emmons +Chris Emmons Chris Emmons +Chris January Christian Menard Christian Menard -Christoph Pfister Christopher Torng +Christoph Pfister Chuan Zhu Chun-Chen Hsu Chun-Chen TK Hsu Ciro Santilli Clint Smullen +Cui Jin Cui Jin Curtis Dunham +Daecheol You Dam Sunwoo Dan Gibson Daniel Carvalho Daniel Daniel Carvalho Daniel R. Carvalho +Daniel Gerzhoy Daniel Johnson Daniel Sanchez +Davide Basilio Bartolini David Guillen-Fandos David Guillen David Guillen-Fandos David Guillen Fandos David Hashe David Hashe David Oehmke +David Schall +Derek Christ Derek Hower -Deyaun Guo Deyuan Guo Deyaun Guo Deyuan Guo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) +Deyaun Guo Deyuan Guo Dibakar Gope Dibakar Gope ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) +Dimitrios Chasapis Djordje Kovacevic Djordje Kovacevic -Dongxue Zhang Doğukan Korkmaztürk +Dongxue Zhang Dylan Johnson Earl Ou +eavivi +Éder F. Zulian Edmund Grimley Evans +Eduardo José Gómez Hernández +Eliot Moss Emilio Castillo Emilio Castillo Emilio Castillo Emilio Castillo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) +Emily Brickey Erfan Azarkhish +Erhu Eric Van Hensbergen Eric Van Hensbergen +Eric Ye Erik Hallnor Erik Tomusk Faissal Sleiman Faissal Sleiman Fernando Endo +Franklin He Gabe Black Gabe Black Gabe Black Gabe Black +Gabe Loh gloh Gabor Dozsa +Gabriel Busnot +gauravjain14 Gedare Bloom Gedare Bloom Gene Wu Gene WU Gene WU Gene Wu -Geoffrey Blake Geoffrey Blake Geoffrey Blake Geoffrey Blake +Geoffrey Blake Geoffrey Blake Georg Kotheimer Giacomo Gabrielli Giacomo Gabrielli Giacomo Travaglini Glenn Bergmans +GWDx Hamid Reza Khaleghzadeh Hamid Reza Khaleghzadeh ext:(%2C%20Lluc%20Alvarez%20%3Clluc.alvarez%40bsc.es%3E%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) +handsomeliu Hanhwi Jang Hoa Nguyen Hongil Yoon Hsuan Hsu +huangjs Hussein Elnawawy Ian Jiang IanJiangICT Ilias Vougioukas +Iru Cai Isaac Richter Isaac Sánchez Barrera Ivan Pizarro @@ -123,104 +157,152 @@ Jairo Balart Jakub Jermar James Clarkson Jan-Peter Larsson -Jason Lowe-Power Jason Lowe-Power +Jan Vrany +Jarvis Jia +Jasjeet Rangi Jason Lowe-Power Jason Lowe-Power -Jason Lowe-Power Jason Power -Jason Lowe-Power Jason Power +Jason Lowe-Power Jason Lowe-Power Jason Lowe-Power Jason Power ext:(%2C%20Joel%20Hestness%20%3Chestness%40cs.wisc.edu%3E) +Jason Lowe-Power Jason Power +Jason Lowe-Power Jason Power +Jason Yu Javier Bueno Hedo Javier Bueno Javier Cano-Cano +Javier Garcia Hernandez Javier Setoain Jayneel Gandhi Jennifer Treichler -Jieming Yin +Jerin Joy +Jiajie Chen +Jiasen Huang +Jiasen +Jiayi Huang +jiegec +Jieming Yin jiemingyin Jing Qu JingQuJQ Jiuyue Ma Joe Gross Joe Gross +Joel Hestness Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) Joel Hestness Joel Hestness Joel Hestness Joel Hestness -Joel Hestness Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) +Joël Porquet-Lupine John Alsop John Kalamatianos jkalamat Jordi Vaquero Jose Marinho +Juan M. Cebrian Jui-min Lee +kai.ren Kai Ren Kanishk Sugand Karthik Sangaiah +Kaustav Goswami +Kelly Nguyen Ke Meng Kevin Brodsky Kevin Lim +Kevin Loughlin Khalique Koan-Sin Tan Korey Sewell Krishnendra Nathella Krishnendra Nathella +ksco +kunpai +Kyle Roarty Kyle Roarty +Laura Hinman Lena Olson Lena Olson Lena Olson Lena Olson Lisa Hsu Lisa Hsu Lluc Alvarez Lluís Vilanova Lluis Vilanova +Lukas Steiner +Luming Wang +m5test Mahyar Samani +Majid Jalili Malek Musleh Nilay Vaish ext:(%2C%20Malek%20Musleh%20%3Cmalek.musleh%40gmail.com%3E) Marc Mari Barcelo -Marc Orr Marc Orr Marco Balboni Marco Elver Marco Elver +Marc Orr Marc Orr +Marjan Fariborz marjanfariborz +Mark Hildebrand +Marton Erdos +Maryam Babaie Matt DeVuyst -Matt Evans Matt Evans -Matt Horsnell Matt Horsnell -Matt Horsnell Matt Horsnell -Matt Horsnell Matt Horsnell -Matt Poremba Matt Poremba Matteo Andreozzi Matteo Andreozzi Matteo M. Fusi +Matt Evans Matt Evans Matthew Poremba Matthew Poremba -Matt Sinclair Matthew Sinclair Matthias Hille Matthias Jung +Matthias Jung +Matt Horsnell Matt Horsnell +Matt Horsnell Matt Horsnell +Matt Horsnell Matt Horsnell +Matt Poremba Matt Poremba +Matt Sinclair Matthew Sinclair +Matt Sinclair Matt Sinclair Maurice Becker Maxime Martinasso -Maximilian Stein +Maximilian Stein Maximilian Stein Maximilien Breughe Maximilien Breughe +Melissa Jost Michael Adler +Michael Boyer Michael LeBeane Michael LeBeane Michael LeBeane mlebeane Michael Levenhagen -Michiel Van Tol Michiel W. van Tol Michiel Van Tol Michiel van Tol +Michiel Van Tol Michiel W. van Tol Miguel Serrano +Mike Upton Miles Kaufmann -Min Kyu Jeong Min Kyu Jeong Mingyuan -Mitch Hayenga Mitch Hayenga -Mitch Hayenga Mitch Hayenga -Mitch Hayenga Mitch Hayenga -Mitch Hayenga Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) +Min Kyu Jeong Min Kyu Jeong Mitch Hayenga Mitchell Hayenga +Mitch Hayenga Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) +Mitch Hayenga Mitch Hayenga +Mitch Hayenga Mitch Hayenga +Mitch Hayenga Mitch Hayenga Mohammad Alian Monir Mozumder Moyang Wang Mrinmoy Ghosh Mrinmoy Ghosh -Nathan Binkert Nathan Binkert +Muhammad Sarmad Saeed +Nadia Etemadi Nathanael Premillieu Nathanael Premillieu +Nathanael Premillieu Nathanael Premillieu Nathanael Premillieu Nathanael Premillieu Nathanael Premillieu Nathanael Premillieu Nathanael Premillieu Nathanael Premillieu +Nathan Binkert Nathan Binkert Nayan Deshmukh Neha Agarwal +Neil Natekar Nicholas Lindsay +Nicolas Boichat Nicolas Derumigny Nicolas Zea Nikos Nikoleris Nikos Nikoleris +Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) Nils Asmussen Nils Asmussen +Noah Katz +ntampouratzis Nuwan Jayasena Ola Jeppsson Omar Naji +Onur Kayiran Pablo Prieto +paikunal Palle Lyckegaard Pau Cabre Paul Rosenfeld Paul Rosenfeld Paul Rosenfeld Paul Rosenfeld Peter Enns Pierre-Yves Péneau +Peter +Peter Yuen +Philip Metzler +Pierre Ayoub Pin-Yen Lin Po-Hao Su Polina Dudnik Polina Dudnik @@ -229,23 +311,26 @@ Pouya Fotouhi Pouya Fotouhi Prakash Ramrakhyani Prakash Ramrakhani Prakash Ramrakhyani Prakash Ramrakhyani Pritha Ghoshal +Quentin Forcioli Radhika Jagtap Radhika Jagtap Rahul Thakur Reiley Jeapaul -Rekai Gonzalez-Alberquilla Rekai -Rekai Gonzalez-Alberquilla Rekai Gonzalez Alberquilla Rekai Gonzalez-Alberquilla Rekai Gonzalez Alberquilla +Rekai Gonzalez-Alberquilla Rekai Gonzalez Alberquilla Rekai Gonzalez-Alberquilla Rekai Gonzalez-Alberquilla +Rekai Gonzalez-Alberquilla Rekai Rene de Jong Ricardo Alves +Richard Cooper +Richard D. Strong Richard Strong Richard Strong Richard Strong Richard Strong Richard Strong Rick Strong Rico Amslinger Riken Gohil Rizwana Begum -Robert Scheffel Robert Robert Kovacsics +Robert Scheffel Robert Rohit Kurup Ron Dreslinski Ronald Dreslinski Ruben Ayrapetyan @@ -253,20 +338,27 @@ Rune Holm Ruslan Bukin Ruslan Bukin ext:(%2C%20Zhang%20Guoye) Rutuja Oza Ryan Gambord +sacak32 +Sampad Mohapatra Samuel Grayson -Sandipan Das +Samuel Stark +Sandipan Das <31861871+sandip4n@users.noreply.github.com> +Sandipan Das Sandipan Das <31861871+sandip4n@users.noreply.github.com> Santi Galan -Sascha Bischoff Sascha Bischoff Sascha Bischoff Sascha Bischoff +Sascha Bischoff Sascha Bischoff Sean McGoogan Sean Wilson Sergei Trofimov Severin Wischmann Severin Wischmann ext:(%2C%20Ioannis%20Ilkos%20%3Cioannis.ilkos09%40imperial.ac.uk%3E) Shawn Rosti Sherif Elhabbal +Shivani Parekh +Shivani Siddhesh Poyarekar Somayeh Sardashti Sooraj Puthoor +Sooraj Puthoor Sophiane Senni Soumyaroop Roy Srikant Bharadwaj @@ -275,13 +367,14 @@ Stanislaw Czerniawski Stephan Diestelhorst Stephan Diestelhorst Stephen Hines Steve Raasch -Steve Reinhardt Steve Reinhardt -Steve Reinhardt Steve Reinhardt -Steve Reinhardt Steve Reinhardt Steve Reinhardt Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) +Steve Reinhardt Steve Reinhardt +Steve Reinhardt Steve Reinhardt +Steve Reinhardt Steve Reinhardt Stian Hvatum Sudhanshu Jha Sujay Phadke +Sungkeun Kim Swapnil Haria Swapnil Haria Taeho Kgil Tao Zhang @@ -290,44 +383,50 @@ Tiago Mück Tiago Muck Tim Harris Timothy Hayes Timothy M. Jones Timothy Jones -Timothy M. Jones Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) Timothy M. Jones Timothy M. Jones Timothy M. Jones Timothy M. Jones Tom Jablin Tommaso Marinelli +Tom Rollet +Tong Shen Tony Gutierrez Anthony Gutierrez -Tuan Ta Tuan Ta -Tushar Krishna Tushar Krishna +Travis Boraten +Trivikram Reddy tv-reddy +Tuan Ta Tuan Ta Tuan Ta Tushar Krishna Tushar Krishna +Tushar Krishna Tushar Krishna Umesh Bhaskar Uri Wiener Victor Garcia Vilas Sridharan -Vince Weaver Vincentius Robby +Vince Weaver +vramadas95 +vsoria Wade Walker +Wei-Han Chen Weiping Liao +Wende Tan Wendy Elsasser -William Wang William Wang William Wang William Wang +William Wang William Wang Willy Wolff +Wing Li Xiangyu Dong -Xianwei Zhang +Xianwei Zhang Xianwei Zhang Xiaoyu Ma Xin Ouyang +Xiongfei Yasuko Eckert -Yi Xiang +Yen-lin Lai Yifei Liu -Yu-hsin Wang +yiwkd2 +Yi Xiang Yuan Yao Yuetsu Kodama yuetsu.kodama +Yu-hsin Wang Zhang Zheng +Zhantong Qiu +Zhengrong Wang seanzw +zhongchengyong Zicong Wang -Éder F. Zulian -Gabe Loh gloh -jiegec -m5test -Marjan Fariborz marjanfariborz -Mike Upton -seanzw -Trivikram Reddy tv-reddy From 91f8f2b276bf0a4ef03963822dbcbadd6a657d2e Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 2 Dec 2022 11:15:45 -0800 Subject: [PATCH 049/492] tests: Add missing `_pre_instantiate()` As of this change: https://gem5-review.googlesource.com/c/public/gem5/+/65051, the `_pre_instantiate` function must be called prior to `m5.instantiate` when using the stdlib without the Simulator module. The "test/gem5/replacement-policies/run_replacement_policy_test.py" lacked this and was causing errors when running replacement policy tests. In addition we also fix the incorrect type of size in`createArtifact'. This was causing problems with the Kokoro build system. The typing here was `int` but had a default value of `None`. The correct type is therefore `Optional[int]`. Change-Id: Ibaf63151196b15f68e643fa5c1b290439d6618c8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66371 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce (cherry picked from commit a3fd9631cc209914fad2e2c1fb24006d6d5adc2d) Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66613 --- tests/gem5/replacement-policies/run_replacement_policy_test.py | 1 + util/gem5art/artifact/gem5art/artifact/artifact.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/gem5/replacement-policies/run_replacement_policy_test.py b/tests/gem5/replacement-policies/run_replacement_policy_test.py index 10061094b4..31076c6d99 100644 --- a/tests/gem5/replacement-policies/run_replacement_policy_test.py +++ b/tests/gem5/replacement-policies/run_replacement_policy_test.py @@ -85,6 +85,7 @@ motherboard = TestBoard( ) root = Root(full_system=False, system=motherboard) +motherboard._pre_instantiate() m5.instantiate() generator.start_traffic() diff --git a/util/gem5art/artifact/gem5art/artifact/artifact.py b/util/gem5art/artifact/gem5art/artifact/artifact.py index 91ffc64e50..46664e82fb 100644 --- a/util/gem5art/artifact/gem5art/artifact/artifact.py +++ b/util/gem5art/artifact/gem5art/artifact/artifact.py @@ -158,7 +158,7 @@ class Artifact: documentation: str, inputs: List["Artifact"] = [], architecture: str = "", - size: int = None, + size: Optional[int] = None, is_zipped: bool = False, md5sum: str = "", url: str = "", From ce03482a394fd4309104ca15d002d3070fac3aef Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Thu, 8 Dec 2022 17:03:49 +0800 Subject: [PATCH 050/492] mem: Implement and use the recvMemBackdoorReq func. In the previous change, we miss some model supporting atomic backdoor. We should also implement the recvMemBackdoorReq to them. Change-Id: I4706d215aa4a5d18fe4306b2387f9c8750cb4b4a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66551 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/mem/hbm_ctrl.cc | 15 +++++++++++++++ src/mem/hbm_ctrl.hh | 2 ++ src/mem/thread_bridge.cc | 8 ++++++++ src/mem/thread_bridge.hh | 2 ++ 4 files changed, 27 insertions(+) diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc index 99618c4b5f..747e714f57 100644 --- a/src/mem/hbm_ctrl.cc +++ b/src/mem/hbm_ctrl.cc @@ -150,6 +150,21 @@ HBMCtrl::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) return latency; } +void +HBMCtrl::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + auto &range = req.range(); + if (pc0Int && pc0Int->getAddrRange().isSubset(range)) { + pc0Int->getBackdoor(backdoor); + } else if (pc1Int && pc1Int->getAddrRange().isSubset(range)) { + pc1Int->getBackdoor(backdoor); + } + else { + panic("Can't handle address range for range %s\n", range.to_string()); + } +} + bool HBMCtrl::writeQueueFullPC0(unsigned int neededEntries) const { diff --git a/src/mem/hbm_ctrl.hh b/src/mem/hbm_ctrl.hh index c9045f0ae7..a6ecf6c589 100644 --- a/src/mem/hbm_ctrl.hh +++ b/src/mem/hbm_ctrl.hh @@ -259,6 +259,8 @@ class HBMCtrl : public MemCtrl Tick recvAtomic(PacketPtr pkt) override; Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) override; void recvFunctional(PacketPtr pkt) override; + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &_backdoor) override; bool recvTimingReq(PacketPtr pkt) override; }; diff --git a/src/mem/thread_bridge.cc b/src/mem/thread_bridge.cc index 3f76ef49b3..efaf19a0e2 100644 --- a/src/mem/thread_bridge.cc +++ b/src/mem/thread_bridge.cc @@ -84,6 +84,14 @@ ThreadBridge::IncomingPort::recvFunctional(PacketPtr pkt) device_.out_port_.sendFunctional(pkt); } +void +ThreadBridge::IncomingPort::recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) +{ + EventQueue::ScopedMigration migrate(device_.eventQueue()); + device_.out_port_.sendMemBackdoorReq(req, backdoor); +} + ThreadBridge::OutgoingPort::OutgoingPort(const std::string &name, ThreadBridge &device) : RequestPort(name, &device), device_(device) diff --git a/src/mem/thread_bridge.hh b/src/mem/thread_bridge.hh index 28c959193c..92cb078dd1 100644 --- a/src/mem/thread_bridge.hh +++ b/src/mem/thread_bridge.hh @@ -61,6 +61,8 @@ class ThreadBridge : public SimObject // FunctionalResponseProtocol void recvFunctional(PacketPtr pkt) override; + void recvMemBackdoorReq(const MemBackdoorReq &req, + MemBackdoorPtr &backdoor) override; private: ThreadBridge &device_; From d65173d596cee8f62fa25b41c78ab07dcf18cf72 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sat, 10 Dec 2022 15:56:02 -0800 Subject: [PATCH 051/492] tests: Move replacement policy tests to long/Nightly These tests require the compilation of NULL with the MI_Example cache coherence protocol. This is a large overhead for these tests. They are therefore better off being run nightly rather than as a pre-commit/kokoro/quick test. Change-Id: I87b25afa313ecca65c738e3a8692a9bf72b06620 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66615 Reviewed-by: Matt Sinclair Tested-by: kokoro Maintainer: Matt Sinclair --- tests/gem5/replacement-policies/test_replacement_policies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gem5/replacement-policies/test_replacement_policies.py b/tests/gem5/replacement-policies/test_replacement_policies.py index 3a30c0a070..7b00e10429 100644 --- a/tests/gem5/replacement-policies/test_replacement_policies.py +++ b/tests/gem5/replacement-policies/test_replacement_policies.py @@ -51,7 +51,7 @@ def test_replacement_policy(config_name: str, config_path: str) -> None: valid_isas=(constants.null_tag,), protocol="MI_example", valid_hosts=constants.supported_hosts, - length=constants.quick_tag, + length=constants.long_tag, ) From fa34ebc8535c682717f6dc55649d41b0f16b9762 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 29 Nov 2022 16:18:22 +0800 Subject: [PATCH 052/492] arch-riscv: Fork ACDFIMU_Zfh instructions into rv32/rv64 1. Add rvSelect for rv32 and rv64. 2. Add rvZext and rvSext for rv32 handle sign extension 3. Fork the following instructions into rv32/rv64 version A extensions: SC.W LR.D SC.D AMOADD.D AMOSWAP.D AMOXOR.D AMOOR.D AMOAND.D AMOMIN.D AMOMAX.D AMOMINU.D AMOMAXU.D C extensions: C.ADDI4SPN C.FLD C.LW C.FLW C.LD C.FSD C.SW C.FSW C.SD C.ADDI C.JAL C.ADDIW C.ADDI16SP C.SRLI C.SRAI C.ANDI C.SUB C.XOR C.OR C.AND C.SUBW C.ADDW C.J C.BEQZ C.BNEZ C.SLLI C.FLDSP C.LWSP C.FLWSP C.LDSP C.JR C.MV C.EBREAK C.JALR C.ADD C.FSDSP C.SWSP C.FSWSP C.SDSPF D extensions: FCVT.L.D FCVT.LU.D FCVT.D.L FCVT.D.LU FMV.X.D FCLASS.D FMV.D.X F extensions: FSW FCVT.L.S FCVT.LU.S FCVT.S.W FCVT.S.WU FCVT.S.L FCVT.S.LU FMV.X.W FCLASS.S FMV.W.X I extensions: LD LWU SLLI ADDI SLTI SLTIU XORI SRLI SRAI ORI ANDI AUIPC ADDIW SLLIW SRLIW SRAIW SD ADD SUB SLL SLT SLTU XOR SRL SRA OR AND LUI BEQ BNE BLT BGE BLTU BGEU JALR JAL ADDW SUBW SLLW SRLW SRAW M extensions: MUL MULH MULHSU MULHU DIV DIVU REM REMU MULW DIVW DIVUW REMW REMUW ZFH extensions: FSH FCVT.L.H FCVT.LU.H FCVT.H.L FCVT.H.LU Change-Id: I8604324eadb700591db028aa3b013b060ba37de5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65111 Reviewed-by: Jason Lowe-Power Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/arch/riscv/insts/static_inst.hh | 12 + src/arch/riscv/isa/bitfields.isa | 2 + src/arch/riscv/isa/decoder.isa | 1084 ++++++++++++++++----------- src/arch/riscv/isa/formats/amo.isa | 10 +- src/arch/riscv/isa/formats/mem.isa | 4 +- 5 files changed, 681 insertions(+), 431 deletions(-) diff --git a/src/arch/riscv/insts/static_inst.hh b/src/arch/riscv/insts/static_inst.hh index bccecf2e2f..f835713505 100644 --- a/src/arch/riscv/insts/static_inst.hh +++ b/src/arch/riscv/insts/static_inst.hh @@ -58,6 +58,18 @@ class RiscvStaticInst : public StaticInst bool alignmentOk(ExecContext* xc, Addr addr, Addr size) const; + template + T + rvSelect(T v32, T v64) const + { + return (machInst.rv_type == RV32) ? v32 : v64; + } + + template + T64 rvExt(T64 x) const { return rvSelect((T64)(T32)x, x); } + uint64_t rvZext(uint64_t x) const { return rvExt(x); } + int64_t rvSext(int64_t x) const { return rvExt(x); } + public: ExtMachInst machInst; diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa index 41935c5b0f..863982cfec 100644 --- a/src/arch/riscv/isa/bitfields.isa +++ b/src/arch/riscv/isa/bitfields.isa @@ -3,6 +3,7 @@ // Copyright (c) 2015 RISC-V Foundation // Copyright (c) 2016 The University of Virginia // Copyright (c) 2020 Barkhausen Institut +// Copyright (c) 2022 Google LLC // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -49,6 +50,7 @@ def bitfield FUNCT7 <31:25>; def bitfield SRTYPE <30>; def bitfield SHAMT5 <24:20>; def bitfield SHAMT6 <25:20>; +def bitfield SHAMT6BIT5 <25>; // I-Type def bitfield IMM12 <31:20>; diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index c6b74ff44f..d442002bd5 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -4,6 +4,7 @@ // Copyright (c) 2017 The University of Virginia // Copyright (c) 2020 Barkhausen Institut // Copyright (c) 2021 StreamComputing Corp +// Copyright (c) 2022 Google LLC // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,6 +35,10 @@ // The RISC-V ISA decoder // +// In theory, all registers should be sign extended if not operating in the +// full MXLEN register, but that will cause memory address out of range as it is +// always regarded as uint64. So we'll zero extend PC related registers and +// memory address, and sign extend others. decode QUADRANT default Unknown::unknown() { 0x0: decode COPCODE { 0x0: CIAddi4spnOp::c_addi4spn({{ @@ -45,7 +50,7 @@ decode QUADRANT default Unknown::unknown() { if (machInst == 0) return std::make_shared("zero instruction", machInst); - Rp2 = sp + imm; + Rp2 = rvSext(sp + imm); }}, uint64_t); format CompressedLoad { 0x1: c_fld({{ @@ -58,7 +63,7 @@ decode QUADRANT default Unknown::unknown() { Fp2_bits = Mem; }}, {{ - EA = Rp1 + offset; + EA = rvZext(Rp1 + offset); }}); 0x2: c_lw({{ offset = CIMM2<1:1> << 2 | @@ -67,15 +72,32 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Rp2_sd = Mem_sw; }}, {{ - EA = Rp1 + offset; - }}); - 0x3: c_ld({{ - offset = CIMM3 << 3 | CIMM2 << 6; - }}, {{ - Rp2_sd = Mem_sd; - }}, {{ - EA = Rp1 + offset; + EA = rvZext(Rp1 + offset); }}); + 0x3: decode RVTYPE { + 0x0: c_flw({{ + offset = CIMM2<1:1> << 2 | + CIMM3 << 3 | + CIMM2<0:0> << 6; + }}, {{ + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (status.fs == FPUStatus::OFF) + return std::make_shared("FPU is off", + machInst); + + freg_t fd = freg(f32(Mem_uw)); + Fp2_bits = fd.v; + }}, {{ + EA = (uint32_t)(Rp1_uw + offset); + }}); + 0x1: c_ld({{ + offset = CIMM3 << 3 | CIMM2 << 6; + }}, {{ + Rp2_sd = Mem_sd; + }}, {{ + EA = Rp1 + offset; + }}); + } } format CompressedStore { 0x5: c_fsd({{ @@ -88,7 +110,7 @@ decode QUADRANT default Unknown::unknown() { Mem = Fp2_bits; }}, {{ - EA = Rp1 + offset; + EA = rvZext(Rp1 + offset); }}); 0x6: c_sw({{ offset = CIMM2<1:1> << 2 | @@ -97,15 +119,31 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Mem_uw = Rp2_uw; }}, ea_code={{ - EA = Rp1 + offset; + EA = rvZext(Rp1 + offset); }}); - 0x7: c_sd({{ - offset = CIMM3 << 3 | CIMM2 << 6; - }}, {{ + 0x7: decode RVTYPE { + 0x0: c_fsw({{ + offset = CIMM2<1:1> << 2 | + CIMM3 << 3 | + CIMM2<0:0> << 6; + }}, {{ + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (status.fs == FPUStatus::OFF) + return std::make_shared("FPU is off", + machInst); + + Mem_uw = unboxF32(boxF32(Fs2_bits)); + }}, {{ + EA = (uint32_t)(Rp1_uw + offset); + }}); + 0x1: c_sd({{ + offset = CIMM3 << 3 | CIMM2 << 6; + }}, {{ Mem_ud = Rp2_ud; - }}, {{ - EA = Rp1 + offset; - }}); + }}, {{ + EA = Rp1 + offset; + }}); + } } } 0x1: decode COPCODE { @@ -124,19 +162,34 @@ decode QUADRANT default Unknown::unknown() { "immediate = 0", machInst); } } - Rc1_sd = Rc1_sd + imm; - }}); - 0x1: c_addiw({{ - imm = CIMM5; - if (CIMM1 > 0) - imm |= ~((uint64_t)0x1F); - }}, {{ - if (RC1 == 0) { - return std::make_shared( - "source reg x0", machInst); - } - Rc1_sw = (int32_t)(Rc1_sw + imm); + Rc1_sd = rvSext(Rc1_sd + imm); }}); + 0x1: decode RVTYPE { + 0x0: c_jal({{ + imm = sext<12>((CJUMPIMM3TO1 << 1) | + (CJUMPIMM4TO4 << 4) | + (CJUMPIMM5TO5 << 5) | + (CJUMPIMM6TO6 << 6) | + (CJUMPIMM7TO7 << 7) | + (CJUMPIMM9TO8 << 8) | + (CJUMPIMM10TO10 << 10) | + (CJUMPIMMSIGN << 11)); + }}, {{ + ra_sw = NPC_uw; + NPC_uw = PC_uw + imm; + }}); + 0x1: c_addiw({{ + imm = CIMM5; + if (CIMM1 > 0) + imm |= ~((uint64_t)0x1F); + }}, {{ + if (RC1 == 0) { + return std::make_shared( + "source reg x0", machInst); + } + Rc1_sw = (int32_t)(Rc1_sw + imm); + }}); + } 0x2: c_li({{ imm = CIMM5; if (CIMM1 > 0) @@ -161,7 +214,7 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "immediate = 0", machInst); } - sp_sd = sp_sd + imm; + sp_sd = rvSext(sp_sd + imm); }}); default: c_lui({{ imm = CIMM5 << 12; @@ -185,69 +238,80 @@ decode QUADRANT default Unknown::unknown() { 0x0: c_srli({{ imm = CIMM5 | (CIMM1 << 5); }}, {{ + if (rvSelect((bool)CIMM1, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } if (imm == 0) { return std::make_shared( "immediate = 0", machInst); } - Rp1 = Rp1 >> imm; + // The MSB can never be 1, hence no need to sign ext. + Rp1 = rvZext(Rp1) >> imm; }}, uint64_t); 0x1: c_srai({{ imm = CIMM5 | (CIMM1 << 5); }}, {{ + if (rvSelect((bool)CIMM1, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } if (imm == 0) { return std::make_shared( "immediate = 0", machInst); } - Rp1_sd = Rp1_sd >> imm; + Rp1_sd = rvSext(Rp1_sd) >> imm; }}, uint64_t); 0x2: c_andi({{ imm = CIMM5; if (CIMM1 > 0) imm |= ~((uint64_t)0x1F); }}, {{ - Rp1 = Rp1 & imm; + Rp1 = rvSext(Rp1 & imm); }}, uint64_t); } format CompressedROp { 0x3: decode CFUNCT1 { 0x0: decode CFUNCT2LOW { 0x0: c_sub({{ - Rp1 = Rp1 - Rp2; + Rp1 = rvSext(Rp1 - Rp2); }}); 0x1: c_xor({{ - Rp1 = Rp1 ^ Rp2; + Rp1 = rvSext(Rp1 ^ Rp2); }}); 0x2: c_or({{ - Rp1 = Rp1 | Rp2; + Rp1 = rvSext(Rp1 | Rp2); }}); 0x3: c_and({{ - Rp1 = Rp1 & Rp2; + Rp1 = rvSext(Rp1 & Rp2); }}); } - 0x1: decode CFUNCT2LOW { - 0x0: c_subw({{ - Rp1_sd = (int32_t)Rp1_sd - Rp2_sw; - }}); - 0x1: c_addw({{ - Rp1_sd = (int32_t)Rp1_sd + Rp2_sw; - }}); + 0x1: decode RVTYPE { + 0x1: decode CFUNCT2LOW { + 0x0: c_subw({{ + Rp1_sd = (int32_t)Rp1_sd - Rp2_sw; + }}); + 0x1: c_addw({{ + Rp1_sd = (int32_t)Rp1_sd + Rp2_sw; + }}); + } } } } } 0x5: CJOp::c_j({{ - NPC = PC + imm; + NPC = rvZext(PC + imm); }}, IsDirectControl, IsUncondControl); format CBOp { 0x6: c_beqz({{ - if (Rp1 == 0) - NPC = PC + imm; + if (rvSext(Rp1) == 0) + NPC = rvZext(PC + imm); else NPC = NPC; }}, IsDirectControl, IsCondControl); 0x7: c_bnez({{ - if (Rp1 != 0) - NPC = PC + imm; + if (rvSext(Rp1) != 0) + NPC = rvZext(PC + imm); else NPC = NPC; }}, IsDirectControl, IsCondControl); @@ -257,6 +321,10 @@ decode QUADRANT default Unknown::unknown() { 0x0: CIOp::c_slli({{ imm = CIMM5 | (CIMM1 << 5); }}, {{ + if (rvSelect((bool)CIMM1, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } if (imm == 0) { return std::make_shared( "immediate = 0", machInst); @@ -265,7 +333,7 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "source reg x0", machInst); } - Rc1 = Rc1 << imm; + Rc1 = rvSext(Rc1 << imm); }}, uint64_t); format CompressedLoad { 0x1: c_fldsp({{ @@ -275,7 +343,7 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Fc1_bits = Mem; }}, {{ - EA = sp + offset; + EA = rvZext(sp + offset); }}); 0x2: c_lwsp({{ offset = CIMM5<4:2> << 2 | @@ -286,23 +354,36 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "source reg x0", machInst); } - Rc1_sd = Mem_sw; + Rc1_sw = Mem_sw; }}, {{ - EA = sp + offset; - }}); - 0x3: c_ldsp({{ - offset = CIMM5<4:3> << 3 | - CIMM1 << 5 | - CIMM5<2:0> << 6; - }}, {{ - if (RC1 == 0) { - return std::make_shared( - "source reg x0", machInst); - } - Rc1_sd = Mem_sd; - }}, {{ - EA = sp + offset; + EA = rvZext(sp + offset); }}); + 0x3: decode RVTYPE { + 0x0: c_flwsp({{ + offset = CIMM5<4:2> << 2 | + CIMM1 << 5 | + CIMM5<1:0> << 6; + }}, {{ + freg_t fd; + fd = freg(f32(Mem_uw)); + Fd_bits = fd.v; + }}, {{ + EA = (uint32_t)(sp_uw + offset); + }}); + 0x1: c_ldsp({{ + offset = CIMM5<4:3> << 3 | + CIMM1 << 5 | + CIMM5<2:0> << 6; + }}, {{ + if (RC1 == 0) { + return std::make_shared( + "source reg x0", machInst); + } + Rc1_sd = Mem_sd; + }}, {{ + EA = sp + offset; + }}); + } } 0x4: decode CFUNCT1 { 0x0: decode RC2 { @@ -311,14 +392,14 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "source reg x0", machInst); } - NPC = Rc1; + NPC = rvZext(Rc1); }}, IsIndirectControl, IsUncondControl); default: CROp::c_mv({{ if (RC1 == 0) { return std::make_shared( "source reg x0", machInst); } - Rc1 = Rc2; + Rc1 = rvSext(Rc2); }}); } 0x1: decode RC1 { @@ -335,11 +416,11 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "source reg x0", machInst); } - ra = NPC; - NPC = Rc1; - }}, IsIndirectControl, IsUncondControl, IsCall); + ra = rvSext(NPC); + NPC = rvZext(Rc1); + }}, IsIndirectControl, IsUncondControl); default: CompressedROp::c_add({{ - Rc1_sd = Rc1_sd + Rc2_sd; + Rc1_sd = rvSext(Rc1_sd + Rc2_sd); }}); } } @@ -351,7 +432,7 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Mem_ud = Fc2_bits; }}, {{ - EA = sp + offset; + EA = rvZext(sp + offset); }}); 0x6: c_swsp({{ offset = CIMM6<5:2> << 2 | @@ -359,16 +440,26 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Mem_uw = Rc2_uw; }}, {{ - EA = sp + offset; - }}); - 0x7: c_sdsp({{ - offset = CIMM6<5:3> << 3 | - CIMM6<2:0> << 6; - }}, {{ - Mem = Rc2; - }}, {{ - EA = sp + offset; + EA = rvZext(sp + offset); }}); + 0x7: decode RVTYPE { + 0x0: c_fswsp({{ + offset = CIMM6<5:2> << 2 | + CIMM6<1:0> << 6; + }}, {{ + Mem_uw = unboxF32(boxF32(Fs2_bits)); + }}, {{ + EA = (uint32_t)(sp_uw + offset); + }}); + 0x1: c_sdsp({{ + offset = CIMM6<5:3> << 3 | + CIMM6<2:0> << 6; + }}, {{ + Mem = Rc2; + }}, {{ + EA = sp + offset; + }}); + } } } 0x3: decode OPCODE { @@ -383,18 +474,22 @@ decode QUADRANT default Unknown::unknown() { 0x2: lw({{ Rd_sd = Mem_sw; }}); - 0x3: ld({{ - Rd_sd = Mem_sd; - }}); + 0x3: decode RVTYPE { + 0x1: ld({{ + Rd_sd = Mem_sd; + }}); + } 0x4: lbu({{ Rd = Mem_ub; }}); 0x5: lhu({{ Rd = Mem_uh; }}); - 0x6: lwu({{ - Rd = Mem_uw; - }}); + 0x6: decode RVTYPE { + 0x1: lwu({{ + Rd = Mem_uw; + }}); + } } } @@ -443,7 +538,11 @@ decode QUADRANT default Unknown::unknown() { 0x1: decode FS3 { format IOp { 0x00: slli({{ - Rd = Rs1 << imm; + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + Rd = rvSext(Rs1 << imm); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x02: decode FS2 { 0x0: sha256sum0({{ @@ -521,20 +620,24 @@ decode QUADRANT default Unknown::unknown() { format IOp { 0x0: addi({{ - Rd_sd = Rs1_sd + imm; + Rd_sd = rvSext(Rs1_sd + imm); }}); 0x2: slti({{ - Rd = (Rs1_sd < imm) ? 1 : 0; + Rd = (rvSext(Rs1_sd) < imm) ? 1 : 0; }}); 0x3: sltiu({{ - Rd = (Rs1 < imm) ? 1 : 0; - }}, uint64_t); + Rd = (rvZext(Rs1) < imm) ? 1 : 0; + }}, uint64_t, imm_code = {{ imm = rvZext(sext<12>(IMM12)); }}); 0x4: xori({{ - Rd = Rs1 ^ imm; + Rd = rvSext(Rs1 ^ imm); }}, uint64_t); 0x5: decode FS3 { 0x0: srli({{ - Rd = Rs1 >> imm; + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + Rd = rvSext(rvZext(Rs1) >> imm); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x5: orc_b({{ uint64_t result = 0; @@ -549,7 +652,11 @@ decode QUADRANT default Unknown::unknown() { Rd = result; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x8: srai({{ - Rd_sd = Rs1_sd >> imm; + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + Rd_sd = rvSext(Rs1_sd) >> imm; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x9: bexti({{ uint64_t index = imm & (64 - 1); @@ -579,27 +686,31 @@ decode QUADRANT default Unknown::unknown() { } } 0x6: ori({{ - Rd = Rs1 | imm; + Rd = rvSext(Rs1 | imm); }}, uint64_t); 0x7: andi({{ - Rd = Rs1 & imm; + Rd = rvSext(Rs1 & imm); }}, uint64_t); } } 0x05: UOp::auipc({{ - Rd = PC + (sext<20>(imm) << 12); + Rd = rvSext(PC + (sext<20>(imm) << 12)); }}); 0x06: decode FUNCT3 { format IOp { - 0x0: addiw({{ - Rd_sw = (int32_t)(Rs1_sw + imm); - }}, int32_t); + 0x0: decode RVTYPE { + 0x1: addiw({{ + Rd_sw = (int32_t)(Rs1_sw + imm); + }}, int32_t); + } 0x1: decode FS3 { - 0x0: slliw({{ - Rd_sd = Rs1_sw << imm; - }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + 0x0: decode RVTYPE { + 0x1: slliw({{ + Rd_sd = Rs1_sw << imm; + }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + } 0x1: slli_uw({{ Rd = ((uint64_t)(Rs1_uw)) << imm; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); @@ -616,12 +727,16 @@ decode QUADRANT default Unknown::unknown() { } } 0x5: decode FS3 { - 0x0: srliw({{ - Rd_sd = (int32_t)(Rs1_uw >> imm); - }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); - 0x8: sraiw({{ - Rd_sd = Rs1_sw >> imm; - }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + 0x0: decode RVTYPE { + 0x1: srliw({{ + Rd_sd = (int32_t)(Rs1_uw >> imm); + }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + } + 0x8: decode RVTYPE { + 0x1: sraiw({{ + Rd_sd = Rs1_sw >> imm; + }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + } 0xc: roriw({{ Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1)))); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); @@ -640,9 +755,11 @@ decode QUADRANT default Unknown::unknown() { 0x2: sw({{ Mem_uw = Rs2_uw; }}); - 0x3: sd({{ - Mem_ud = Rs2_ud; - }}); + 0x3: decode RVTYPE { + 0x1: sd({{ + Mem_ud = Rs2_ud; + }}); + } } } @@ -654,7 +771,7 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "FPU is off", machInst); - Mem_uh = (uint16_t)Fs2_bits; + Mem_uh = unboxF16(boxF16(Fs2_bits)); }}, inst_flags=FloatMemWriteOp); 0x2: fsw({{ STATUS status = xc->readMiscReg(MISCREG_STATUS); @@ -662,7 +779,7 @@ decode QUADRANT default Unknown::unknown() { return std::make_shared( "FPU is off", machInst); - Mem_uw = (uint32_t)Fs2_bits; + Mem_uw = unboxF32(boxF32(Fs2_bits)); }}, inst_flags=FloatMemWriteOp); 0x3: fsd({{ STATUS status = xc->readMiscReg(MISCREG_STATUS); @@ -683,7 +800,7 @@ decode QUADRANT default Unknown::unknown() { 0x3: StoreCond::sc_w({{ Mem_uw = Rs2_uw; }}, {{ - Rd = result; + Rd = rvSext(result); }}, inst_flags=IsStoreConditional, mem_flags=LLSC); 0x0: AtomicMemOp::amoadd_w({{ Rd_sd = Mem_sw; @@ -749,78 +866,84 @@ decode QUADRANT default Unknown::unknown() { [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; }); }}, mem_flags=ATOMIC_RETURN_OP); } - 0x3: decode AMOFUNCT { - 0x2: LoadReserved::lr_d({{ - Rd_sd = Mem_sd; - }}, mem_flags=LLSC); - 0x3: StoreCond::sc_d({{ - Mem = Rs2; - }}, {{ - Rd = result; - }}, mem_flags=LLSC, inst_flags=IsStoreConditional); - 0x0: AtomicMemOp::amoadd_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = + 0x3: decode RVTYPE { + 0x1: decode AMOFUNCT { + 0x2: LoadReserved::lr_d({{ + Rd_sd = Mem_sd; + }}, mem_flags=LLSC); + 0x3: StoreCond::sc_d({{ + Mem = Rs2; + }}, {{ + Rd = result; + }}, mem_flags=LLSC, inst_flags=IsStoreConditional); + 0x0: AtomicMemOp::amoadd_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sd, + [](int64_t* b, int64_t a){ *b += a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1: AtomicMemOp::amoswap_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x4: AtomicMemOp::amoxor_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b ^= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x8: AtomicMemOp::amoor_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b |= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0xc: AtomicMemOp::amoand_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b &= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x10: AtomicMemOp::amomin_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = new AtomicGenericOp(Rs2_sd, - [](int64_t* b, int64_t a){ *b += a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x1: AtomicMemOp::amoswap_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = + [](int64_t* b, int64_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x14: AtomicMemOp::amomax_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = + new AtomicGenericOp(Rs2_sd, + [](int64_t* b, int64_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x18: AtomicMemOp::amominu_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = new AtomicGenericOp(Rs2_ud, - [](uint64_t* b, uint64_t a){ *b = a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x4: AtomicMemOp::amoxor_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = + [](uint64_t* b, uint64_t a){ + if (a < *b) *b = a; + }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1c: AtomicMemOp::amomaxu_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor *amo_op = new AtomicGenericOp(Rs2_ud, - [](uint64_t* b, uint64_t a){ *b ^= a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x8: AtomicMemOp::amoor_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = - new AtomicGenericOp(Rs2_ud, - [](uint64_t* b, uint64_t a){ *b |= a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0xc: AtomicMemOp::amoand_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = - new AtomicGenericOp(Rs2_ud, - [](uint64_t* b, uint64_t a){ *b &= a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x10: AtomicMemOp::amomin_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = - new AtomicGenericOp(Rs2_sd, - [](int64_t* b, int64_t a){ if (a < *b) *b = a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x14: AtomicMemOp::amomax_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = - new AtomicGenericOp(Rs2_sd, - [](int64_t* b, int64_t a){ if (a > *b) *b = a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x18: AtomicMemOp::amominu_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = - new AtomicGenericOp(Rs2_ud, - [](uint64_t* b, uint64_t a){ if (a < *b) *b = a; }); - }}, mem_flags=ATOMIC_RETURN_OP); - 0x1c: AtomicMemOp::amomaxu_d({{ - Rd_sd = Mem_sd; - }}, {{ - TypedAtomicOpFunctor *amo_op = - new AtomicGenericOp(Rs2_ud, - [](uint64_t* b, uint64_t a){ if (a > *b) *b = a; }); - }}, mem_flags=ATOMIC_RETURN_OP); + [](uint64_t* b, uint64_t a){ + if (a > *b) *b = a; + }); + }}, mem_flags=ATOMIC_RETURN_OP); + } } } 0x0c: decode FUNCT3 { @@ -828,15 +951,15 @@ decode QUADRANT default Unknown::unknown() { 0x0: decode KFUNCT5 { 0x00: decode BS { 0x0: add({{ - Rd = Rs1_sd + Rs2_sd; + Rd = rvSext(Rs1_sd + Rs2_sd); }}); 0x1: sub({{ - Rd = Rs1_sd - Rs2_sd; + Rd = rvSext(Rs1_sd - Rs2_sd); }}); } 0x01: decode BS { 0x0: mul({{ - Rd = Rs1_sd * Rs2_sd; + Rd = rvSext(Rs1_sd * Rs2_sd); }}, IntMultOp); } 0x18: sm4ed({{ @@ -871,30 +994,36 @@ decode QUADRANT default Unknown::unknown() { } 0x1: decode FUNCT7 { 0x0: sll({{ - Rd = Rs1 << Rs2<5:0>; + Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>)); }}); - 0x1: mulh({{ - bool negate = (Rs1_sd < 0) != (Rs2_sd < 0); + 0x1: decode RVTYPE { + 0x0: rv32_mulh({{ + Rd_sw = ((int64_t)Rs1_sw * Rs2_sw) >> 32; + }}, IntMultOp); + 0x1: mulh({{ + bool negate = (Rs1_sd < 0) != (Rs2_sd < 0); - uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); - uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; - uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd); - uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32; + uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); + uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; + uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd); + uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32; - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs2_lo*Rs1_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; + uint64_t hi = Rs1_hi*Rs2_hi; + uint64_t mid1 = Rs1_hi*Rs2_lo; + uint64_t mid2 = Rs1_lo*Rs2_hi; + uint64_t lo = Rs2_lo*Rs1_lo; + uint64_t carry = ((uint64_t)(uint32_t)mid1 + + (uint64_t)(uint32_t)mid2 + + (lo >> 32)) >> 32; - uint64_t res = hi + - (mid1 >> 32) + - (mid2 >> 32) + - carry; - Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0) - : res; - }}, IntMultOp); + uint64_t res = hi + + (mid1 >> 32) + + (mid2 >> 32) + + carry; + Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0) + : res; + }}, IntMultOp); + } 0x5: clmul({{ uint64_t result = 0; for (int i = 0; i < 64; i++) { @@ -923,28 +1052,34 @@ decode QUADRANT default Unknown::unknown() { } 0x2: decode FUNCT7 { 0x0: slt({{ - Rd = (Rs1_sd < Rs2_sd) ? 1 : 0; + Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0; }}); - 0x1: mulhsu({{ - bool negate = Rs1_sd < 0; - uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); - uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; - uint64_t Rs2_lo = (uint32_t)Rs2; - uint64_t Rs2_hi = Rs2 >> 32; + 0x1: decode RVTYPE { + 0x0: rv32_mulhsu({{ + Rd_sw = ((int64_t)Rs1_sw * Rs2_uw) >> 32; + }}, IntMultOp); + 0x1: mulhsu({{ + bool negate = Rs1_sd < 0; + uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); + uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; + uint64_t Rs2_lo = (uint32_t)Rs2; + uint64_t Rs2_hi = Rs2 >> 32; - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs1_lo*Rs2_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; + uint64_t hi = Rs1_hi*Rs2_hi; + uint64_t mid1 = Rs1_hi*Rs2_lo; + uint64_t mid2 = Rs1_lo*Rs2_hi; + uint64_t lo = Rs1_lo*Rs2_lo; + uint64_t carry = ((uint64_t)(uint32_t)mid1 + + (uint64_t)(uint32_t)mid2 + + (lo >> 32)) >> 32; - uint64_t res = hi + - (mid1 >> 32) + - (mid2 >> 32) + - carry; - Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res; - }}, IntMultOp); + uint64_t res = hi + + (mid1 >> 32) + + (mid2 >> 32) + + carry; + Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res; + }}, IntMultOp); + } 0x5: clmulr({{ uint64_t result = 0; for (int i = 0; i < 64; i++) { @@ -963,23 +1098,29 @@ decode QUADRANT default Unknown::unknown() { } 0x3: decode FUNCT7 { 0x0: sltu({{ - Rd = (Rs1 < Rs2) ? 1 : 0; + Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0; }}); - 0x1: mulhu({{ - uint64_t Rs1_lo = (uint32_t)Rs1; - uint64_t Rs1_hi = Rs1 >> 32; - uint64_t Rs2_lo = (uint32_t)Rs2; - uint64_t Rs2_hi = Rs2 >> 32; + 0x1: decode RVTYPE { + 0x0: rv32_mulhu({{ + Rd_sw = ((uint64_t)Rs1_uw * Rs2_uw) >> 32; + }}, IntMultOp); + 0x1: mulhu({{ + uint64_t Rs1_lo = (uint32_t)Rs1; + uint64_t Rs1_hi = Rs1 >> 32; + uint64_t Rs2_lo = (uint32_t)Rs2; + uint64_t Rs2_hi = Rs2 >> 32; - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs1_lo*Rs2_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; + uint64_t hi = Rs1_hi*Rs2_hi; + uint64_t mid1 = Rs1_hi*Rs2_lo; + uint64_t mid2 = Rs1_lo*Rs2_hi; + uint64_t lo = Rs1_lo*Rs2_lo; + uint64_t carry = ((uint64_t)(uint32_t)mid1 + + (uint64_t)(uint32_t)mid2 + + (lo >> 32)) >> 32; - Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry; - }}, IntMultOp); + Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry; + }}, IntMultOp); + } 0x5: clmulh({{ uint64_t result = 0; for (int i = 1; i < 64; i++) { @@ -992,19 +1133,32 @@ decode QUADRANT default Unknown::unknown() { } 0x4: decode FUNCT7 { 0x0: xor({{ - Rd = Rs1 ^ Rs2; + Rd = rvSext(Rs1 ^ Rs2); }}); - 0x1: div({{ - if (Rs2_sd == 0) { - Rd_sd = -1; - } else if ( - Rs1_sd == std::numeric_limits::min() - && Rs2_sd == -1) { - Rd_sd = std::numeric_limits::min(); - } else { - Rd_sd = Rs1_sd/Rs2_sd; - } - }}, IntDivOp); + 0x1: decode RVTYPE { + 0x0: rv32_div({{ + constexpr int32_t kRsMin = \ + std::numeric_limits::min(); + if (Rs2_sw == 0) { + Rd_sw = -1; + } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { + Rd_sw = kRsMin; + } else { + Rd_sw = Rs1_sw/Rs2_sw; + } + }}, IntDivOp); + 0x1: div({{ + constexpr int64_t kRsMin = \ + std::numeric_limits::min(); + if (Rs2_sd == 0) { + Rd_sd = -1; + } else if (Rs1_sd == kRsMin && Rs2_sd == -1) { + Rd_sd = kRsMin; + } else { + Rd_sd = Rs1_sd/Rs2_sd; + } + }}, IntDivOp); + } 0x5: min({{ Rd = (((int64_t) Rs1) < ((int64_t) Rs2)) ? Rs1 : Rs2; }}); @@ -1020,17 +1174,27 @@ decode QUADRANT default Unknown::unknown() { } 0x5: decode FUNCT7 { 0x0: srl({{ - Rd = Rs1 >> Rs2<5:0>; + Rd = rvSext(rvZext(Rs1) >> + rvSelect(Rs2<4:0>, Rs2<5:0>)); }}); - 0x1: divu({{ - if (Rs2 == 0) { - Rd = std::numeric_limits::max(); - } else { - Rd = Rs1/Rs2; - } - }}, IntDivOp); + 0x1: decode RVTYPE { + 0x0: rv32_divu({{ + if (Rs2_uw == 0) { + Rd_sw = std::numeric_limits::max(); + } else { + Rd_sw = Rs1_uw/Rs2_uw; + } + }}, IntDivOp); + 0x1: divu({{ + if (Rs2 == 0) { + Rd = std::numeric_limits::max(); + } else { + Rd = Rs1/Rs2; + } + }}, IntDivOp); + } 0x20: sra({{ - Rd_sd = Rs1_sd >> Rs2<5:0>; + Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>); }}); 0x5: minu({{ Rd = Rs1 < Rs2 ? Rs1 : Rs2; @@ -1046,19 +1210,32 @@ decode QUADRANT default Unknown::unknown() { } 0x6: decode FUNCT7 { 0x0: or({{ - Rd = Rs1 | Rs2; + Rd = rvSext(Rs1 | Rs2); }}); - 0x1: rem({{ - if (Rs2_sd == 0) { - Rd = Rs1_sd; - } else if ( - Rs1_sd == std::numeric_limits::min() - && Rs2_sd == -1) { - Rd = 0; - } else { - Rd = Rs1_sd%Rs2_sd; - } - }}, IntDivOp); + 0x1: decode RVTYPE { + 0x0: rv32_rem({{ + constexpr int32_t kRsMin = \ + std::numeric_limits::min(); + if (Rs2_sw == 0) { + Rd_sw = Rs1_sw; + } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { + Rd_sw = 0; + } else { + Rd_sw = Rs1_sw%Rs2_sw; + } + }}, IntDivOp); + 0x1: rem({{ + constexpr int64_t kRsMin = \ + std::numeric_limits::min(); + if (Rs2_sd == 0) { + Rd = Rs1_sd; + } else if (Rs1_sd == kRsMin && Rs2_sd == -1) { + Rd = 0; + } else { + Rd = Rs1_sd%Rs2_sd; + } + }}, IntDivOp); + } 0x5: max({{ Rd = (((int64_t) Rs1) > ((int64_t) Rs2)) ? Rs1 : Rs2; }}); @@ -1071,15 +1248,24 @@ decode QUADRANT default Unknown::unknown() { } 0x7: decode FUNCT7 { 0x0: and({{ - Rd = Rs1 & Rs2; + Rd = rvSext(Rs1 & Rs2); }}); - 0x1: remu({{ - if (Rs2 == 0) { - Rd = Rs1; - } else { - Rd = Rs1%Rs2; - } - }}, IntDivOp); + 0x1: decode RVTYPE { + 0x0: rv32_remu({{ + if (Rs2_uw == 0) { + Rd_sw = Rs1_uw; + } else { + Rd_sw = Rs1_uw%Rs2_uw; + } + }}, IntDivOp); + 0x1: remu({{ + if (Rs2 == 0) { + Rd = Rs1; + } else { + Rd = Rs1%Rs2; + } + }}, IntDivOp); + } 0x5: maxu({{ Rd = Rs1 > Rs2 ? Rs1 : Rs2; }}); @@ -1091,29 +1277,37 @@ decode QUADRANT default Unknown::unknown() { } 0x0d: UOp::lui({{ - Rd = (uint64_t)(sext<20>(imm) << 12); + Rd = (sext<20>(imm) << 12); }}); 0x0e: decode FUNCT3 { format ROp { 0x0: decode FUNCT7 { - 0x0: addw({{ - Rd_sd = Rs1_sw + Rs2_sw; - }}); - 0x1: mulw({{ - Rd_sd = (int32_t)(Rs1_sw*Rs2_sw); - }}, IntMultOp); + 0x0: decode RVTYPE { + 0x1: addw({{ + Rd_sd = Rs1_sw + Rs2_sw; + }}); + } + 0x1: decode RVTYPE { + 0x1: mulw({{ + Rd_sd = (int32_t)(Rs1_sw*Rs2_sw); + }}, IntMultOp); + } 0x4: add_uw({{ Rd = Rs1_uw + Rs2; }}); - 0x20: subw({{ - Rd_sd = Rs1_sw - Rs2_sw; - }}); + 0x20: decode RVTYPE { + 0x1: subw({{ + Rd_sd = Rs1_sw - Rs2_sw; + }}); + } } 0x1: decode FUNCT7 { - 0x0: sllw({{ - Rd_sd = Rs1_sw << Rs2<4:0>; - }}); + 0x0: decode RVTYPE { + 0x1: sllw({{ + Rd_sd = Rs1_sw << Rs2<4:0>; + }}); + } 0x30: rolw({{ int shamt = Rs2 & (32 - 1); Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1)))); @@ -1125,16 +1319,19 @@ decode QUADRANT default Unknown::unknown() { }}); } 0x4: decode FUNCT7 { - 0x1: divw({{ - if (Rs2_sw == 0) { - Rd_sd = -1; - } else if (Rs1_sw == std::numeric_limits::min() - && Rs2_sw == -1) { - Rd_sd = std::numeric_limits::min(); - } else { - Rd_sd = Rs1_sw/Rs2_sw; - } - }}, IntDivOp); + 0x1: decode RVTYPE { + 0x1: divw({{ + constexpr int32_t kRsMin = \ + std::numeric_limits::min(); + if (Rs2_sw == 0) { + Rd_sd = -1; + } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { + Rd_sd = kRsMin; + } else { + Rd_sd = Rs1_sw/Rs2_sw; + } + }}, IntDivOp); + } 0x4: zext_h({{ Rd = Rs1_uh; }}); @@ -1143,46 +1340,57 @@ decode QUADRANT default Unknown::unknown() { }}); } 0x5: decode FUNCT7 { - 0x0: srlw({{ - Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>); - }}); - 0x1: divuw({{ - if (Rs2_uw == 0) { - Rd_sd = std::numeric_limits::max(); - } else { - Rd_sd = (int32_t)(Rs1_uw/Rs2_uw); - } - }}, IntDivOp); - 0x20: sraw({{ - Rd_sd = Rs1_sw >> Rs2<4:0>; - }}); + 0x0: decode RVTYPE { + 0x1: srlw({{ + Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>); + }}); + } + 0x1: decode RVTYPE { + 0x1: divuw({{ + if (Rs2_uw == 0) { + Rd_sd = std::numeric_limits::max(); + } else { + Rd_sd = (int32_t)(Rs1_uw/Rs2_uw); + } + }}, IntDivOp); + } + 0x20: decode RVTYPE { + 0x1: sraw({{ + Rd_sd = Rs1_sw >> Rs2<4:0>; + }}); + } 0x30: rorw({{ int shamt = Rs2 & (32 - 1); Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1)))); }}); } 0x6: decode FUNCT7 { - 0x1: remw({{ - if (Rs2_sw == 0) { - Rd_sd = Rs1_sw; - } else if (Rs1_sw == std::numeric_limits::min() - && Rs2_sw == -1) { - Rd_sd = 0; - } else { - Rd_sd = Rs1_sw%Rs2_sw; - } - }}, IntDivOp); + 0x1: decode RVTYPE { + 0x1: remw({{ + constexpr int32_t kRsMin = \ + std::numeric_limits::min(); + if (Rs2_sw == 0) { + Rd_sd = Rs1_sw; + } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { + Rd_sd = 0; + } else { + Rd_sd = Rs1_sw%Rs2_sw; + } + }}, IntDivOp); + } 0x10: sh3add_uw({{ Rd = (((uint64_t)Rs1_uw) << 3) + Rs2; }}); } - 0x7: remuw({{ - if (Rs2_uw == 0) { - Rd_sd = (int32_t)Rs1_uw; - } else { - Rd_sd = (int32_t)(Rs1_uw%Rs2_uw); - } - }}, IntDivOp); + 0x7: decode RVTYPE { + 0x1: remuw({{ + if (Rs2_uw == 0) { + Rd_sd = (int32_t)Rs1_uw; + } else { + Rd_sd = (int32_t)(Rs1_uw%Rs2_uw); + } + }}, IntDivOp); + } } } @@ -1643,14 +1851,18 @@ decode QUADRANT default Unknown::unknown() { Rd = sext<32>(f32_to_ui32(f32(freg(Fs1_bits)), rm, true)); }}, FloatCvtOp); - 0x2: fcvt_l_s({{ - RM_REQUIRED; - Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true); - }}, FloatCvtOp); - 0x3: fcvt_lu_s({{ - RM_REQUIRED; - Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true); - }}, FloatCvtOp); + 0x2: decode RVTYPE { + 0x1: fcvt_l_s({{ + RM_REQUIRED; + Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true); + }}, FloatCvtOp); + } + 0x3: decode RVTYPE { + 0x1: fcvt_lu_s({{ + RM_REQUIRED; + Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true); + }}, FloatCvtOp); + } } 0x61: decode CONV_SGN { 0x0: fcvt_w_d({{ @@ -1663,14 +1875,18 @@ decode QUADRANT default Unknown::unknown() { Rd = sext<32>(f64_to_ui32(f64(freg(Fs1_bits)), rm, true)); }}, FloatCvtOp); - 0x2: fcvt_l_d({{ - RM_REQUIRED; - Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true); - }}, FloatCvtOp); - 0x3: fcvt_lu_d({{ - RM_REQUIRED; - Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true); - }}, FloatCvtOp); + 0x2: decode RVTYPE { + 0x1: fcvt_l_d({{ + RM_REQUIRED; + Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true); + }}, FloatCvtOp); + } + 0x3: decode RVTYPE { + 0x1: fcvt_lu_d({{ + RM_REQUIRED; + Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true); + }}, FloatCvtOp); + } } 0x62: decode CONV_SGN { 0x0: fcvt_w_h({{ @@ -1683,40 +1899,48 @@ decode QUADRANT default Unknown::unknown() { Rd = sext<32>(f16_to_ui32(f16(freg(Fs1_bits)), rm, true)); }}, FloatCvtOp); - 0x2: fcvt_l_h({{ - RM_REQUIRED; - Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true); - }}, FloatCvtOp); - 0x3: fcvt_lu_h({{ - RM_REQUIRED; - Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true); - }}, FloatCvtOp); + 0x2: decode RVTYPE { + 0x1: fcvt_l_h({{ + RM_REQUIRED; + Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true); + }}, FloatCvtOp); + } + 0x3: decode RVTYPE { + 0x1: fcvt_lu_h({{ + RM_REQUIRED; + Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true); + }}, FloatCvtOp); + } } 0x68: decode CONV_SGN { 0x0: fcvt_s_w({{ RM_REQUIRED; freg_t fd; - fd = freg(i32_to_f32((int32_t)Rs1_sw)); + fd = freg(i32_to_f32(Rs1_sw)); Fd_bits = fd.v; }}, FloatCvtOp); 0x1: fcvt_s_wu({{ RM_REQUIRED; freg_t fd; - fd = freg(ui32_to_f32((uint32_t)Rs1_uw)); + fd = freg(ui32_to_f32(Rs1_uw)); Fd_bits = fd.v; }}, FloatCvtOp); - 0x2: fcvt_s_l({{ - RM_REQUIRED; - freg_t fd; - fd = freg(i64_to_f32(Rs1_ud)); - Fd_bits = fd.v; + 0x2: decode RVTYPE { + 0x1: fcvt_s_l({{ + RM_REQUIRED; + freg_t fd; + fd = freg(i64_to_f32(Rs1_ud)); + Fd_bits = fd.v; }}, FloatCvtOp); - 0x3: fcvt_s_lu({{ - RM_REQUIRED; - freg_t fd; - fd = freg(ui64_to_f32(Rs1)); - Fd_bits = fd.v; + } + 0x3: decode RVTYPE { + 0x1: fcvt_s_lu({{ + RM_REQUIRED; + freg_t fd; + fd = freg(ui64_to_f32(Rs1)); + Fd_bits = fd.v; }}, FloatCvtOp); + } } 0x69: decode CONV_SGN { 0x0: fcvt_d_w({{ @@ -1727,14 +1951,18 @@ decode QUADRANT default Unknown::unknown() { RM_REQUIRED; Fd = (double)Rs1_uw; }}, FloatCvtOp); - 0x2: fcvt_d_l({{ - RM_REQUIRED; - Fd = (double)Rs1_sd; - }}, FloatCvtOp); - 0x3: fcvt_d_lu({{ - RM_REQUIRED; - Fd = (double)Rs1; - }}, FloatCvtOp); + 0x2: decode RVTYPE { + 0x1: fcvt_d_l({{ + RM_REQUIRED; + Fd = (double)Rs1_sd; + }}, FloatCvtOp); + } + 0x3: decode RVTYPE { + 0x1: fcvt_d_lu({{ + RM_REQUIRED; + Fd = (double)Rs1; + }}, FloatCvtOp); + } } 0x6a: decode CONV_SGN { 0x0: fcvt_h_w({{ @@ -1749,18 +1977,22 @@ decode QUADRANT default Unknown::unknown() { fd = freg(ui32_to_f16((uint32_t)Rs1_uw)); Fd_bits = fd.v; }}, FloatCvtOp); - 0x2: fcvt_h_l({{ - RM_REQUIRED; - freg_t fd; - fd = freg(i64_to_f16(Rs1_ud)); - Fd_bits = fd.v; - }}, FloatCvtOp); - 0x3: fcvt_h_lu({{ - RM_REQUIRED; - freg_t fd; - fd = freg(ui64_to_f16(Rs1)); - Fd_bits = fd.v; - }}, FloatCvtOp); + 0x2: decode RVTYPE { + 0x1: fcvt_h_l({{ + RM_REQUIRED; + freg_t fd; + fd = freg(i64_to_f16(Rs1_ud)); + Fd_bits = fd.v; + }}, FloatCvtOp); + } + 0x3: decode RVTYPE { + 0x1: fcvt_h_lu({{ + RM_REQUIRED; + freg_t fd; + fd = freg(ui64_to_f16(Rs1)); + Fd_bits = fd.v; + }}, FloatCvtOp); + } } 0x70: decode ROUND_MODE { 0x0: fmv_x_w({{ @@ -1771,13 +2003,15 @@ decode QUADRANT default Unknown::unknown() { Rd = result; }}, FloatCvtOp); 0x1: fclass_s({{ - Rd = f32_classify(f32(freg(Fs1_bits))); + Rd = rvSext(f32_classify(f32(freg(Fs1_bits)))); }}, FloatMiscOp); } 0x71: decode ROUND_MODE { - 0x0: fmv_x_d({{ - Rd = freg(Fs1_bits).v; - }}, FloatCvtOp); + 0x0: decode RVTYPE { + 0x1: fmv_x_d({{ + Rd = freg(Fs1_bits).v; + }}, FloatCvtOp); + } 0x1: fclass_d({{ Rd = f64_classify(f64(freg(Fs1_bits))); }}, FloatMiscOp); @@ -1799,11 +2033,13 @@ decode QUADRANT default Unknown::unknown() { fd = freg(f32(Rs1_uw)); Fd_bits = fd.v; }}, FloatCvtOp); - 0x79: fmv_d_x({{ - freg_t fd; - fd = freg(f64(Rs1)); - Fd_bits = fd.v; - }}, FloatCvtOp); + 0x79: decode RVTYPE { + 0x1: fmv_d_x({{ + freg_t fd; + fd = freg(f64(Rs1)); + Fd_bits = fd.v; + }}, FloatCvtOp); + } 0x7a: fmv_h_x({{ freg_t fd; fd = freg(f16(Rs1_uh)); @@ -1815,45 +2051,45 @@ decode QUADRANT default Unknown::unknown() { 0x18: decode FUNCT3 { format BOp { 0x0: beq({{ - if (Rs1 == Rs2) { - NPC = PC + imm; + if (rvSext(Rs1) == rvSext(Rs2)) { + NPC = rvZext(PC + imm); } else { - NPC = NPC; + NPC = rvZext(NPC); } }}, IsDirectControl, IsCondControl); 0x1: bne({{ - if (Rs1 != Rs2) { - NPC = PC + imm; + if (rvSext(Rs1) != rvSext(Rs2)) { + NPC = rvZext(PC + imm); } else { - NPC = NPC; + NPC = rvZext(NPC); } }}, IsDirectControl, IsCondControl); 0x4: blt({{ - if (Rs1_sd < Rs2_sd) { - NPC = PC + imm; + if (rvSext(Rs1_sd) < rvSext(Rs2_sd)) { + NPC = rvZext(PC + imm); } else { - NPC = NPC; + NPC = rvZext(NPC); } }}, IsDirectControl, IsCondControl); 0x5: bge({{ - if (Rs1_sd >= Rs2_sd) { - NPC = PC + imm; + if (rvSext(Rs1_sd) >= rvSext(Rs2_sd)) { + NPC = rvZext(PC + imm); } else { - NPC = NPC; + NPC = rvZext(NPC); } }}, IsDirectControl, IsCondControl); 0x6: bltu({{ - if (Rs1 < Rs2) { - NPC = PC + imm; + if (rvZext(Rs1) < rvZext(Rs2)) { + NPC = rvZext(PC + imm); } else { - NPC = NPC; + NPC = rvZext(NPC); } }}, IsDirectControl, IsCondControl); 0x7: bgeu({{ - if (Rs1 >= Rs2) { - NPC = PC + imm; + if (rvZext(Rs1) >= rvZext(Rs2)) { + NPC = rvZext(PC + imm); } else { - NPC = NPC; + NPC = rvZext(NPC); } }}, IsDirectControl, IsCondControl); } @@ -1861,14 +2097,14 @@ decode QUADRANT default Unknown::unknown() { 0x19: decode FUNCT3 { 0x0: Jump::jalr({{ - Rd = NPC; - NPC = (imm + Rs1) & (~0x1); + Rd = rvSext(NPC); + NPC = rvZext((imm + Rs1) & (~0x1)); }}, IsIndirectControl, IsUncondControl); } 0x1b: JOp::jal({{ - Rd = NPC; - NPC = PC + imm; + Rd = rvSext(NPC); + NPC = rvZext(PC + imm); }}, IsDirectControl, IsUncondControl); 0x1c: decode FUNCT3 { diff --git a/src/arch/riscv/isa/formats/amo.isa b/src/arch/riscv/isa/formats/amo.isa index 6b22e8f439..fe497536cc 100644 --- a/src/arch/riscv/isa/formats/amo.isa +++ b/src/arch/riscv/isa/formats/amo.isa @@ -446,8 +446,8 @@ def template AtomicMemOpRMWCompleteAcc {{ // LR/SC/AMO decode formats -def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, - mem_flags=[], inst_flags=[]) {{ +def format LoadReserved(memacc_code, postacc_code={{ }}, + ea_code={{EA = rvZext(Rs1);}}, mem_flags=[], inst_flags=[]) {{ macro_ea_code = '' macro_inst_flags = [] macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code, @@ -473,8 +473,8 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, + LoadReservedCompleteAcc.subst(iop) }}; -def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, - mem_flags=[], inst_flags=[]) {{ +def format StoreCond(memacc_code, postacc_code={{ }}, + ea_code={{EA = rvZext(Rs1);}}, mem_flags=[], inst_flags=[]) {{ macro_ea_code = '' macro_inst_flags = [] macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code, @@ -501,7 +501,7 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, }}; def format AtomicMemOp(memacc_code, amoop_code, postacc_code={{ }}, - ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + ea_code={{EA = rvZext(Rs1);}}, mem_flags=[], inst_flags=[]) {{ macro_ea_code = '' macro_inst_flags = [] macro_iop = InstObjParams(name, Name, 'AtomicMemOp', macro_ea_code, diff --git a/src/arch/riscv/isa/formats/mem.isa b/src/arch/riscv/isa/formats/mem.isa index fa334585a7..0d80260a25 100644 --- a/src/arch/riscv/isa/formats/mem.isa +++ b/src/arch/riscv/isa/formats/mem.isa @@ -228,7 +228,7 @@ def template StoreCompleteAcc {{ } }}; -def format Load(memacc_code, ea_code = {{EA = Rs1 + offset;}}, +def format Load(memacc_code, ea_code = {{EA = rvZext(Rs1 + offset);}}, offset_code={{offset = sext<12>(IMM12);}}, mem_flags=[], inst_flags=[]) {{ (header_output, decoder_output, decode_block, exec_output) = \ @@ -236,7 +236,7 @@ def format Load(memacc_code, ea_code = {{EA = Rs1 + offset;}}, inst_flags, 'Load', exec_template_base='Load') }}; -def format Store(memacc_code, ea_code={{EA = Rs1 + offset;}}, +def format Store(memacc_code, ea_code={{EA = rvZext(Rs1 + offset);}}, offset_code={{offset = sext<12>(IMM5 | (IMM7 << 5));}}, mem_flags=[], inst_flags=[]) {{ (header_output, decoder_output, decode_block, exec_output) = \ From dd04e7044503046d19b89361edd9a48e14e66ab8 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Mon, 28 Nov 2022 08:27:48 +0000 Subject: [PATCH 053/492] arch-riscv: Implement rv32 zicsr extension 1. Add misc register mstatush, cycleh, timeh, instreth, hpmcounter03...hpmcounter31, pmpcfg1, pmpcfg3 2. Implement handling RV32 only registers 3. Implement methods of set time CSR Change-Id: I5c55c18a0da91977d6e23da24ea3cbcba9f0509b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65733 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/isa.cc | 110 ++++- src/arch/riscv/isa/decoder.isa | 24 +- src/arch/riscv/isa/formats/standard.isa | 9 + src/arch/riscv/regs/misc.hh | 586 ++++++++++++++++-------- src/dev/riscv/clint.cc | 7 +- 5 files changed, 518 insertions(+), 218 deletions(-) diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index c8eabd44ad..3b4f378afa 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -148,9 +148,9 @@ namespace RiscvISA [MISCREG_MCAUSE] = "MCAUSE", [MISCREG_MTVAL] = "MTVAL", [MISCREG_PMPCFG0] = "PMPCFG0", - // pmpcfg1 rv32 only + [MISCREG_PMPCFG1] = "PMPCFG1", // pmpcfg1 is rv32 only [MISCREG_PMPCFG2] = "PMPCFG2", - // pmpcfg3 rv32 only + [MISCREG_PMPCFG3] = "PMPCFG3", // pmpcfg3 is rv32 only [MISCREG_PMPADDR00] = "PMPADDR00", [MISCREG_PMPADDR01] = "PMPADDR01", [MISCREG_PMPADDR02] = "PMPADDR02", @@ -189,6 +189,42 @@ namespace RiscvISA [MISCREG_NMIVEC] = "NMIVEC", [MISCREG_NMIE] = "NMIE", [MISCREG_NMIP] = "NMIP", + + // following are rv32 only registers + [MISCREG_MSTATUSH] = "MSTATUSH", + + [MISCREG_CYCLEH] = "CYCLEH", + [MISCREG_TIMEH] = "TIMEH", + [MISCREG_INSTRETH] = "INSTRETH", + [MISCREG_HPMCOUNTER03H] = "HPMCOUNTER03H", + [MISCREG_HPMCOUNTER04H] = "HPMCOUNTER04H", + [MISCREG_HPMCOUNTER05H] = "HPMCOUNTER05H", + [MISCREG_HPMCOUNTER06H] = "HPMCOUNTER06H", + [MISCREG_HPMCOUNTER07H] = "HPMCOUNTER07H", + [MISCREG_HPMCOUNTER08H] = "HPMCOUNTER08H", + [MISCREG_HPMCOUNTER09H] = "HPMCOUNTER09H", + [MISCREG_HPMCOUNTER10H] = "HPMCOUNTER10H", + [MISCREG_HPMCOUNTER11H] = "HPMCOUNTER11H", + [MISCREG_HPMCOUNTER12H] = "HPMCOUNTER12H", + [MISCREG_HPMCOUNTER13H] = "HPMCOUNTER13H", + [MISCREG_HPMCOUNTER14H] = "HPMCOUNTER14H", + [MISCREG_HPMCOUNTER15H] = "HPMCOUNTER15H", + [MISCREG_HPMCOUNTER16H] = "HPMCOUNTER16H", + [MISCREG_HPMCOUNTER17H] = "HPMCOUNTER17H", + [MISCREG_HPMCOUNTER18H] = "HPMCOUNTER18H", + [MISCREG_HPMCOUNTER19H] = "HPMCOUNTER19H", + [MISCREG_HPMCOUNTER20H] = "HPMCOUNTER20H", + [MISCREG_HPMCOUNTER21H] = "HPMCOUNTER21H", + [MISCREG_HPMCOUNTER22H] = "HPMCOUNTER22H", + [MISCREG_HPMCOUNTER23H] = "HPMCOUNTER23H", + [MISCREG_HPMCOUNTER24H] = "HPMCOUNTER24H", + [MISCREG_HPMCOUNTER25H] = "HPMCOUNTER25H", + [MISCREG_HPMCOUNTER26H] = "HPMCOUNTER26H", + [MISCREG_HPMCOUNTER27H] = "HPMCOUNTER27H", + [MISCREG_HPMCOUNTER28H] = "HPMCOUNTER28H", + [MISCREG_HPMCOUNTER29H] = "HPMCOUNTER29H", + [MISCREG_HPMCOUNTER30H] = "HPMCOUNTER30H", + [MISCREG_HPMCOUNTER31H] = "HPMCOUNTER31H", }}; namespace @@ -273,7 +309,13 @@ void ISA::clear() bool ISA::hpmCounterEnabled(int misc_reg) const { - int hpmcounter = misc_reg - MISCREG_CYCLE; + int hpmcounter = 0; + if (misc_reg >= MISCREG_CYCLEH) { + hpmcounter = misc_reg - MISCREG_CYCLEH; + } else { + hpmcounter = misc_reg - MISCREG_CYCLE; + } + if (hpmcounter < 0 || hpmcounter > 31) panic("Illegal HPM counter %d\n", hpmcounter); int counteren; @@ -313,7 +355,16 @@ ISA::readMiscReg(RegIndex idx) if (hpmCounterEnabled(MISCREG_CYCLE)) { DPRINTF(RiscvMisc, "Cycle counter at: %llu.\n", tc->getCpuPtr()->curCycle()); - return tc->getCpuPtr()->curCycle(); + return static_cast(tc->getCpuPtr()->curCycle()); + } else { + warn("Cycle counter disabled.\n"); + return 0; + } + case MISCREG_CYCLEH: + if (hpmCounterEnabled(MISCREG_CYCLEH)) { + DPRINTF(RiscvMisc, "Cycle counter at: %llu.\n", + tc->getCpuPtr()->curCycle()); + return bits(tc->getCpuPtr()->curCycle(), 63, 32); } else { warn("Cycle counter disabled.\n"); return 0; @@ -327,11 +378,29 @@ ISA::readMiscReg(RegIndex idx) warn("Wall clock disabled.\n"); return 0; } + case MISCREG_TIMEH: + if (hpmCounterEnabled(MISCREG_TIMEH)) { + DPRINTF(RiscvMisc, "Wall-clock counter at: %llu.\n", + std::time(nullptr)); + return readMiscRegNoEffect(MISCREG_TIMEH); + } else { + warn("Wall clock disabled.\n"); + return 0; + } case MISCREG_INSTRET: if (hpmCounterEnabled(MISCREG_INSTRET)) { DPRINTF(RiscvMisc, "Instruction counter at: %llu.\n", tc->getCpuPtr()->totalInsts()); - return tc->getCpuPtr()->totalInsts(); + return static_cast(tc->getCpuPtr()->totalInsts()); + } else { + warn("Instruction counter disabled.\n"); + return 0; + } + case MISCREG_INSTRETH: + if (hpmCounterEnabled(MISCREG_INSTRETH)) { + DPRINTF(RiscvMisc, "Instruction counter at: %llu.\n", + tc->getCpuPtr()->totalInsts()); + return bits(tc->getCpuPtr()->totalInsts(), 63, 32); } else { warn("Instruction counter disabled.\n"); return 0; @@ -406,6 +475,16 @@ ISA::readMiscReg(RegIndex idx) warn("HPM counter %d disabled.\n", idx - MISCREG_CYCLE); return 0; } + } else if (idx >= MISCREG_HPMCOUNTER03H && + idx <= MISCREG_HPMCOUNTER31H) { + if (hpmCounterEnabled(idx)) { + DPRINTF(RiscvMisc, "HPM counter %d: %llu.\n", + idx - MISCREG_CYCLE, tc->getCpuPtr()->curCycle()); + return bits(tc->getCpuPtr()->curCycle(), 63, 32); + } else { + warn("HPM counter %d disabled.\n", idx - MISCREG_CYCLE); + return 0; + } } return readMiscRegNoEffect(idx); } @@ -437,18 +516,32 @@ ISA::setMiscReg(RegIndex idx, RegVal val) // for 8 PMP entries. case MISCREG_PMPCFG0: + case MISCREG_PMPCFG1: case MISCREG_PMPCFG2: + case MISCREG_PMPCFG3: { // PMP registers should only be modified in M mode assert(readMiscRegNoEffect(MISCREG_PRV) == PRV_M); + int regSize = 0; + switch (rv_type) { + case RV32: + regSize = 4; + break; + case RV64: + regSize = 8; + break; + default: + panic("%s: Unknown rv_type: %d", name(), (int)rv_type); + } + // Specs do not seem to mention what should be // configured first, cfg or address regs! // qemu seems to update the tables when // pmp addr regs are written (with the assumption // that cfg regs are already written) - for (int i=0; i < sizeof(val); i++) { + for (int i=0; i < regSize; i++) { uint8_t cfg_val = (val >> (8*i)) & 0xff; auto mmu = dynamic_cast @@ -456,10 +549,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val) // Form pmp_index using the index i and // PMPCFG register number - // Note: MISCREG_PMPCFG2 - MISCREG_PMPCFG0 = 1 - // 8*(idx-MISCREG_PMPCFG0) will be useful - // if a system contains more than 16 PMP entries - uint32_t pmp_index = i+(8*(idx-MISCREG_PMPCFG0)); + uint32_t pmp_index = i+(4*(idx-MISCREG_PMPCFG0)); mmu->getPMP()->pmpUpdateCfg(pmp_index,cfg_val); } diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index d442002bd5..885794032a 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -2201,28 +2201,28 @@ decode QUADRANT default Unknown::unknown() { } format CSROp { 0x1: csrrw({{ - Rd = data; - data = Rs1; + Rd = rvSext(data); + data = rvZext(Rs1); }}, IsSerializeAfter, IsNonSpeculative, No_OpClass); 0x2: csrrs({{ - Rd = data; - data |= Rs1; + Rd = rvSext(data); + data = rvZext(data | Rs1); }}, IsSerializeAfter, IsNonSpeculative, No_OpClass); 0x3: csrrc({{ - Rd = data; - data &= ~Rs1; + Rd = rvSext(data); + data = rvZext(data & ~Rs1); }}, IsSerializeAfter, IsNonSpeculative, No_OpClass); 0x5: csrrwi({{ - Rd = data; - data = uimm; + Rd = rvSext(data); + data = rvZext(uimm); }}, IsSerializeAfter, IsNonSpeculative, No_OpClass); 0x6: csrrsi({{ - Rd = data; - data |= uimm; + Rd = rvSext(data); + data = rvZext(data | uimm); }}, IsSerializeAfter, IsNonSpeculative, No_OpClass); 0x7: csrrci({{ - Rd = data; - data &= ~uimm; + Rd = rvSext(data); + data = rvZext(data & ~uimm); }}, IsSerializeAfter, IsNonSpeculative, No_OpClass); } } diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa index 5390164f10..6be281fa56 100644 --- a/src/arch/riscv/isa/formats/standard.isa +++ b/src/arch/riscv/isa/formats/standard.isa @@ -338,6 +338,14 @@ def template CSRExecute {{ RegIndex midx = csr_data_it->second.physIndex; const std::string& csrName = csr_data_it->second.name; + if ((csr_data_it->second.rvTypes & (1 << machInst.rv_type)) == 0) { + return std::make_shared( + csprintf("%s is not support in mode %d\n", + csrName, + machInst.rv_type), + machInst); + } + auto mask_it = csr_masks.find(csr); RegVal maskVal = (mask_it == csr_masks.end()) ? mask(64) : mask_it->second; @@ -373,6 +381,7 @@ def template CSRExecute {{ } else { olddata = xc->readMiscReg(midx); } + olddata = rvZext(olddata); auto olddata_all = olddata; olddata &= maskVal; diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh index 7f6fff4e00..8cb4ca0f91 100644 --- a/src/arch/riscv/regs/misc.hh +++ b/src/arch/riscv/regs/misc.hh @@ -153,9 +153,9 @@ enum MiscRegIndex MISCREG_MCAUSE, MISCREG_MTVAL, MISCREG_PMPCFG0, - // pmpcfg1 rv32 only + MISCREG_PMPCFG1, // pmpcfg1 is rv32 only MISCREG_PMPCFG2, - // pmpcfg3 rv32 only + MISCREG_PMPCFG3, // pmpcfg3 is rv32 only MISCREG_PMPADDR00, MISCREG_PMPADDR01, MISCREG_PMPADDR02, @@ -201,6 +201,42 @@ enum MiscRegIndex // non-maskable-interrupt-pending: NMI version of xIP MISCREG_NMIP, + // the following MicsRegIndex are RV32 only + MISCREG_MSTATUSH, + + MISCREG_CYCLEH, + MISCREG_TIMEH, + MISCREG_INSTRETH, + MISCREG_HPMCOUNTER03H, + MISCREG_HPMCOUNTER04H, + MISCREG_HPMCOUNTER05H, + MISCREG_HPMCOUNTER06H, + MISCREG_HPMCOUNTER07H, + MISCREG_HPMCOUNTER08H, + MISCREG_HPMCOUNTER09H, + MISCREG_HPMCOUNTER10H, + MISCREG_HPMCOUNTER11H, + MISCREG_HPMCOUNTER12H, + MISCREG_HPMCOUNTER13H, + MISCREG_HPMCOUNTER14H, + MISCREG_HPMCOUNTER15H, + MISCREG_HPMCOUNTER16H, + MISCREG_HPMCOUNTER17H, + MISCREG_HPMCOUNTER18H, + MISCREG_HPMCOUNTER19H, + MISCREG_HPMCOUNTER20H, + MISCREG_HPMCOUNTER21H, + MISCREG_HPMCOUNTER22H, + MISCREG_HPMCOUNTER23H, + MISCREG_HPMCOUNTER24H, + MISCREG_HPMCOUNTER25H, + MISCREG_HPMCOUNTER26H, + MISCREG_HPMCOUNTER27H, + MISCREG_HPMCOUNTER28H, + MISCREG_HPMCOUNTER29H, + MISCREG_HPMCOUNTER30H, + MISCREG_HPMCOUNTER31H, + NUM_MISCREGS }; @@ -252,7 +288,41 @@ enum CSRIndex CSR_HPMCOUNTER29 = 0xC1D, CSR_HPMCOUNTER30 = 0xC1E, CSR_HPMCOUNTER31 = 0xC1F, - // HPMCOUNTERH rv32 only + + // rv32 only csr register begin + CSR_CYCLEH = 0xC80, + CSR_TIMEH = 0xC81, + CSR_INSTRETH = 0xC82, + CSR_HPMCOUNTER03H = 0xC83, + CSR_HPMCOUNTER04H = 0xC84, + CSR_HPMCOUNTER05H = 0xC85, + CSR_HPMCOUNTER06H = 0xC86, + CSR_HPMCOUNTER07H = 0xC87, + CSR_HPMCOUNTER08H = 0xC88, + CSR_HPMCOUNTER09H = 0xC89, + CSR_HPMCOUNTER10H = 0xC8A, + CSR_HPMCOUNTER11H = 0xC8B, + CSR_HPMCOUNTER12H = 0xC8C, + CSR_HPMCOUNTER13H = 0xC8D, + CSR_HPMCOUNTER14H = 0xC8E, + CSR_HPMCOUNTER15H = 0xC8F, + CSR_HPMCOUNTER16H = 0xC90, + CSR_HPMCOUNTER17H = 0xC91, + CSR_HPMCOUNTER18H = 0xC92, + CSR_HPMCOUNTER19H = 0xC93, + CSR_HPMCOUNTER20H = 0xC94, + CSR_HPMCOUNTER21H = 0xC95, + CSR_HPMCOUNTER22H = 0xC96, + CSR_HPMCOUNTER23H = 0xC97, + CSR_HPMCOUNTER24H = 0xC98, + CSR_HPMCOUNTER25H = 0xC99, + CSR_HPMCOUNTER26H = 0xC9A, + CSR_HPMCOUNTER27H = 0xC9B, + CSR_HPMCOUNTER28H = 0xC9C, + CSR_HPMCOUNTER29H = 0xC9D, + CSR_HPMCOUNTER30H = 0xC9E, + CSR_HPMCOUNTER31H = 0xC9F, + // rv32 only csr register end CSR_SSTATUS = 0x100, CSR_SEDELEG = 0x102, @@ -278,15 +348,16 @@ enum CSRIndex CSR_MIE = 0x304, CSR_MTVEC = 0x305, CSR_MCOUNTEREN = 0x306, + CSR_MSTATUSH = 0x310, // rv32 only CSR_MSCRATCH = 0x340, CSR_MEPC = 0x341, CSR_MCAUSE = 0x342, CSR_MTVAL = 0x343, CSR_MIP = 0x344, CSR_PMPCFG0 = 0x3A0, - // pmpcfg1 rv32 only + CSR_PMPCFG1 = 0x3A1, // pmpcfg1 rv32 only CSR_PMPCFG2 = 0x3A2, - // pmpcfg3 rv32 only + CSR_PMPCFG3 = 0x3A3,// pmpcfg3 rv32 only CSR_PMPADDR00 = 0x3B0, CSR_PMPADDR01 = 0x3B1, CSR_PMPADDR02 = 0x3B2, @@ -305,36 +376,70 @@ enum CSRIndex CSR_PMPADDR15 = 0x3BF, CSR_MCYCLE = 0xB00, CSR_MINSTRET = 0xB02, - CSR_MHPMCOUNTER03 = 0xC03, - CSR_MHPMCOUNTER04 = 0xC04, - CSR_MHPMCOUNTER05 = 0xC05, - CSR_MHPMCOUNTER06 = 0xC06, - CSR_MHPMCOUNTER07 = 0xC07, - CSR_MHPMCOUNTER08 = 0xC08, - CSR_MHPMCOUNTER09 = 0xC09, - CSR_MHPMCOUNTER10 = 0xC0A, - CSR_MHPMCOUNTER11 = 0xC0B, - CSR_MHPMCOUNTER12 = 0xC0C, - CSR_MHPMCOUNTER13 = 0xC0D, - CSR_MHPMCOUNTER14 = 0xC0E, - CSR_MHPMCOUNTER15 = 0xC0F, - CSR_MHPMCOUNTER16 = 0xC10, - CSR_MHPMCOUNTER17 = 0xC11, - CSR_MHPMCOUNTER18 = 0xC12, - CSR_MHPMCOUNTER19 = 0xC13, - CSR_MHPMCOUNTER20 = 0xC14, - CSR_MHPMCOUNTER21 = 0xC15, - CSR_MHPMCOUNTER22 = 0xC16, - CSR_MHPMCOUNTER23 = 0xC17, - CSR_MHPMCOUNTER24 = 0xC18, - CSR_MHPMCOUNTER25 = 0xC19, - CSR_MHPMCOUNTER26 = 0xC1A, - CSR_MHPMCOUNTER27 = 0xC1B, - CSR_MHPMCOUNTER28 = 0xC1C, - CSR_MHPMCOUNTER29 = 0xC1D, - CSR_MHPMCOUNTER30 = 0xC1E, - CSR_MHPMCOUNTER31 = 0xC1F, - // MHPMCOUNTERH rv32 only + CSR_MHPMCOUNTER03 = 0xB03, + CSR_MHPMCOUNTER04 = 0xB04, + CSR_MHPMCOUNTER05 = 0xB05, + CSR_MHPMCOUNTER06 = 0xB06, + CSR_MHPMCOUNTER07 = 0xB07, + CSR_MHPMCOUNTER08 = 0xB08, + CSR_MHPMCOUNTER09 = 0xB09, + CSR_MHPMCOUNTER10 = 0xB0A, + CSR_MHPMCOUNTER11 = 0xB0B, + CSR_MHPMCOUNTER12 = 0xB0C, + CSR_MHPMCOUNTER13 = 0xB0D, + CSR_MHPMCOUNTER14 = 0xB0E, + CSR_MHPMCOUNTER15 = 0xB0F, + CSR_MHPMCOUNTER16 = 0xB10, + CSR_MHPMCOUNTER17 = 0xB11, + CSR_MHPMCOUNTER18 = 0xB12, + CSR_MHPMCOUNTER19 = 0xB13, + CSR_MHPMCOUNTER20 = 0xB14, + CSR_MHPMCOUNTER21 = 0xB15, + CSR_MHPMCOUNTER22 = 0xB16, + CSR_MHPMCOUNTER23 = 0xB17, + CSR_MHPMCOUNTER24 = 0xB18, + CSR_MHPMCOUNTER25 = 0xB19, + CSR_MHPMCOUNTER26 = 0xB1A, + CSR_MHPMCOUNTER27 = 0xB1B, + CSR_MHPMCOUNTER28 = 0xB1C, + CSR_MHPMCOUNTER29 = 0xB1D, + CSR_MHPMCOUNTER30 = 0xB1E, + CSR_MHPMCOUNTER31 = 0xB1F, + + // rv32 only csr register begin + CSR_MCYCLEH = 0xB80, + CSR_MINSTRETH = 0xB82, + CSR_MHPMCOUNTER03H = 0xB83, + CSR_MHPMCOUNTER04H = 0xB84, + CSR_MHPMCOUNTER05H = 0xB85, + CSR_MHPMCOUNTER06H = 0xB86, + CSR_MHPMCOUNTER07H = 0xB87, + CSR_MHPMCOUNTER08H = 0xB88, + CSR_MHPMCOUNTER09H = 0xB89, + CSR_MHPMCOUNTER10H = 0xB8A, + CSR_MHPMCOUNTER11H = 0xB8B, + CSR_MHPMCOUNTER12H = 0xB8C, + CSR_MHPMCOUNTER13H = 0xB8D, + CSR_MHPMCOUNTER14H = 0xB8E, + CSR_MHPMCOUNTER15H = 0xB8F, + CSR_MHPMCOUNTER16H = 0xB90, + CSR_MHPMCOUNTER17H = 0xB91, + CSR_MHPMCOUNTER18H = 0xB92, + CSR_MHPMCOUNTER19H = 0xB93, + CSR_MHPMCOUNTER20H = 0xB94, + CSR_MHPMCOUNTER21H = 0xB95, + CSR_MHPMCOUNTER22H = 0xB96, + CSR_MHPMCOUNTER23H = 0xB97, + CSR_MHPMCOUNTER24H = 0xB98, + CSR_MHPMCOUNTER25H = 0xB99, + CSR_MHPMCOUNTER26H = 0xB9A, + CSR_MHPMCOUNTER27H = 0xB9B, + CSR_MHPMCOUNTER28H = 0xB9C, + CSR_MHPMCOUNTER29H = 0xB9D, + CSR_MHPMCOUNTER30H = 0xB9E, + CSR_MHPMCOUNTER31H = 0xB9F, + // rv32 only csr register end + CSR_MHPMEVENT03 = 0x323, CSR_MHPMEVENT04 = 0x324, CSR_MHPMEVENT05 = 0x325, @@ -378,170 +483,242 @@ struct CSRMetadata { const std::string name; const int physIndex; + const uint64_t rvTypes; }; +template +constexpr uint64_t rvTypeFlags(T... args) { + return ((1 << args) | ...); +} + const std::unordered_map CSRData = { - {CSR_USTATUS, {"ustatus", MISCREG_STATUS}}, - {CSR_UIE, {"uie", MISCREG_IE}}, - {CSR_UTVEC, {"utvec", MISCREG_UTVEC}}, - {CSR_USCRATCH, {"uscratch", MISCREG_USCRATCH}}, - {CSR_UEPC, {"uepc", MISCREG_UEPC}}, - {CSR_UCAUSE, {"ucause", MISCREG_UCAUSE}}, - {CSR_UTVAL, {"utval", MISCREG_UTVAL}}, - {CSR_UIP, {"uip", MISCREG_IP}}, - {CSR_FFLAGS, {"fflags", MISCREG_FFLAGS}}, - {CSR_FRM, {"frm", MISCREG_FRM}}, - {CSR_FCSR, {"fcsr", MISCREG_FFLAGS}}, // Actually FRM << 5 | FFLAGS - {CSR_CYCLE, {"cycle", MISCREG_CYCLE}}, - {CSR_TIME, {"time", MISCREG_TIME}}, - {CSR_INSTRET, {"instret", MISCREG_INSTRET}}, - {CSR_HPMCOUNTER03, {"hpmcounter03", MISCREG_HPMCOUNTER03}}, - {CSR_HPMCOUNTER04, {"hpmcounter04", MISCREG_HPMCOUNTER04}}, - {CSR_HPMCOUNTER05, {"hpmcounter05", MISCREG_HPMCOUNTER05}}, - {CSR_HPMCOUNTER06, {"hpmcounter06", MISCREG_HPMCOUNTER06}}, - {CSR_HPMCOUNTER07, {"hpmcounter07", MISCREG_HPMCOUNTER07}}, - {CSR_HPMCOUNTER08, {"hpmcounter08", MISCREG_HPMCOUNTER08}}, - {CSR_HPMCOUNTER09, {"hpmcounter09", MISCREG_HPMCOUNTER09}}, - {CSR_HPMCOUNTER10, {"hpmcounter10", MISCREG_HPMCOUNTER10}}, - {CSR_HPMCOUNTER11, {"hpmcounter11", MISCREG_HPMCOUNTER11}}, - {CSR_HPMCOUNTER12, {"hpmcounter12", MISCREG_HPMCOUNTER12}}, - {CSR_HPMCOUNTER13, {"hpmcounter13", MISCREG_HPMCOUNTER13}}, - {CSR_HPMCOUNTER14, {"hpmcounter14", MISCREG_HPMCOUNTER14}}, - {CSR_HPMCOUNTER15, {"hpmcounter15", MISCREG_HPMCOUNTER15}}, - {CSR_HPMCOUNTER16, {"hpmcounter16", MISCREG_HPMCOUNTER16}}, - {CSR_HPMCOUNTER17, {"hpmcounter17", MISCREG_HPMCOUNTER17}}, - {CSR_HPMCOUNTER18, {"hpmcounter18", MISCREG_HPMCOUNTER18}}, - {CSR_HPMCOUNTER19, {"hpmcounter19", MISCREG_HPMCOUNTER19}}, - {CSR_HPMCOUNTER20, {"hpmcounter20", MISCREG_HPMCOUNTER20}}, - {CSR_HPMCOUNTER21, {"hpmcounter21", MISCREG_HPMCOUNTER21}}, - {CSR_HPMCOUNTER22, {"hpmcounter22", MISCREG_HPMCOUNTER22}}, - {CSR_HPMCOUNTER23, {"hpmcounter23", MISCREG_HPMCOUNTER23}}, - {CSR_HPMCOUNTER24, {"hpmcounter24", MISCREG_HPMCOUNTER24}}, - {CSR_HPMCOUNTER25, {"hpmcounter25", MISCREG_HPMCOUNTER25}}, - {CSR_HPMCOUNTER26, {"hpmcounter26", MISCREG_HPMCOUNTER26}}, - {CSR_HPMCOUNTER27, {"hpmcounter27", MISCREG_HPMCOUNTER27}}, - {CSR_HPMCOUNTER28, {"hpmcounter28", MISCREG_HPMCOUNTER28}}, - {CSR_HPMCOUNTER29, {"hpmcounter29", MISCREG_HPMCOUNTER29}}, - {CSR_HPMCOUNTER30, {"hpmcounter30", MISCREG_HPMCOUNTER30}}, - {CSR_HPMCOUNTER31, {"hpmcounter31", MISCREG_HPMCOUNTER31}}, + {CSR_USTATUS, {"ustatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}}, + {CSR_UIE, {"uie", MISCREG_IE, rvTypeFlags(RV64, RV32)}}, + {CSR_UTVEC, {"utvec", MISCREG_UTVEC, rvTypeFlags(RV64, RV32)}}, + {CSR_USCRATCH, {"uscratch", MISCREG_USCRATCH, rvTypeFlags(RV64, RV32)}}, + {CSR_UEPC, {"uepc", MISCREG_UEPC, rvTypeFlags(RV64, RV32)}}, + {CSR_UCAUSE, {"ucause", MISCREG_UCAUSE, rvTypeFlags(RV64, RV32)}}, + {CSR_UTVAL, {"utval", MISCREG_UTVAL, rvTypeFlags(RV64, RV32)}}, + {CSR_UIP, {"uip", MISCREG_IP, rvTypeFlags(RV64, RV32)}}, + {CSR_FFLAGS, {"fflags", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}}, + {CSR_FRM, {"frm", MISCREG_FRM, rvTypeFlags(RV64, RV32)}}, + {CSR_FCSR, {"fcsr", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}}, // Actually FRM << 5 | FFLAGS + {CSR_CYCLE, {"cycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}}, + {CSR_TIME, {"time", MISCREG_TIME, rvTypeFlags(RV64, RV32)}}, + {CSR_INSTRET, {"instret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER03, {"hpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER04, {"hpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER05, {"hpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER06, {"hpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER07, {"hpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER08, {"hpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER09, {"hpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER10, {"hpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER11, {"hpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER12, {"hpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER13, {"hpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER14, {"hpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER15, {"hpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER16, {"hpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER17, {"hpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER18, {"hpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER19, {"hpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER20, {"hpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER21, {"hpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER22, {"hpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER23, {"hpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER24, {"hpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER25, {"hpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER26, {"hpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER27, {"hpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER28, {"hpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER29, {"hpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER30, {"hpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}}, + {CSR_HPMCOUNTER31, {"hpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}}, + {CSR_CYCLEH, {"cycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}}, + {CSR_TIMEH, {"timeh", MISCREG_TIMEH, rvTypeFlags(RV32)}}, + {CSR_INSTRETH, {"instreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER03H, {"hpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER04H, {"hpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER05H, {"hpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER06H, {"hpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER07H, {"hpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER08H, {"hpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER09H, {"hpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER10H, {"hpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER11H, {"hpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER12H, {"hpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER13H, {"hpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER14H, {"hpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER15H, {"hpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER16H, {"hpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER17H, {"hpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER18H, {"hpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER19H, {"hpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER20H, {"hpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER21H, {"hpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER22H, {"hpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER23H, {"hpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER24H, {"hpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER25H, {"hpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER26H, {"hpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER27H, {"hpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER28H, {"hpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER29H, {"hpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER30H, {"hpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}}, + {CSR_HPMCOUNTER31H, {"hpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}}, - {CSR_SSTATUS, {"sstatus", MISCREG_STATUS}}, - {CSR_SEDELEG, {"sedeleg", MISCREG_SEDELEG}}, - {CSR_SIDELEG, {"sideleg", MISCREG_SIDELEG}}, - {CSR_SIE, {"sie", MISCREG_IE}}, - {CSR_STVEC, {"stvec", MISCREG_STVEC}}, - {CSR_SCOUNTEREN, {"scounteren", MISCREG_SCOUNTEREN}}, - {CSR_SSCRATCH, {"sscratch", MISCREG_SSCRATCH}}, - {CSR_SEPC, {"sepc", MISCREG_SEPC}}, - {CSR_SCAUSE, {"scause", MISCREG_SCAUSE}}, - {CSR_STVAL, {"stval", MISCREG_STVAL}}, - {CSR_SIP, {"sip", MISCREG_IP}}, - {CSR_SATP, {"satp", MISCREG_SATP}}, + {CSR_SSTATUS, {"sstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}}, + {CSR_SEDELEG, {"sedeleg", MISCREG_SEDELEG, rvTypeFlags(RV64, RV32)}}, + {CSR_SIDELEG, {"sideleg", MISCREG_SIDELEG, rvTypeFlags(RV64, RV32)}}, + {CSR_SIE, {"sie", MISCREG_IE, rvTypeFlags(RV64, RV32)}}, + {CSR_STVEC, {"stvec", MISCREG_STVEC, rvTypeFlags(RV64, RV32)}}, + {CSR_SCOUNTEREN, {"scounteren", MISCREG_SCOUNTEREN, rvTypeFlags(RV64, RV32)}}, + {CSR_SSCRATCH, {"sscratch", MISCREG_SSCRATCH, rvTypeFlags(RV64, RV32)}}, + {CSR_SEPC, {"sepc", MISCREG_SEPC, rvTypeFlags(RV64, RV32)}}, + {CSR_SCAUSE, {"scause", MISCREG_SCAUSE, rvTypeFlags(RV64, RV32)}}, + {CSR_STVAL, {"stval", MISCREG_STVAL, rvTypeFlags(RV64, RV32)}}, + {CSR_SIP, {"sip", MISCREG_IP, rvTypeFlags(RV64, RV32)}}, + {CSR_SATP, {"satp", MISCREG_SATP, rvTypeFlags(RV64, RV32)}}, - {CSR_MVENDORID, {"mvendorid", MISCREG_VENDORID}}, - {CSR_MARCHID, {"marchid", MISCREG_ARCHID}}, - {CSR_MIMPID, {"mimpid", MISCREG_IMPID}}, - {CSR_MHARTID, {"mhartid", MISCREG_HARTID}}, - {CSR_MSTATUS, {"mstatus", MISCREG_STATUS}}, - {CSR_MISA, {"misa", MISCREG_ISA}}, - {CSR_MEDELEG, {"medeleg", MISCREG_MEDELEG}}, - {CSR_MIDELEG, {"mideleg", MISCREG_MIDELEG}}, - {CSR_MIE, {"mie", MISCREG_IE}}, - {CSR_MTVEC, {"mtvec", MISCREG_MTVEC}}, - {CSR_MCOUNTEREN, {"mcounteren", MISCREG_MCOUNTEREN}}, - {CSR_MSCRATCH, {"mscratch", MISCREG_MSCRATCH}}, - {CSR_MEPC, {"mepc", MISCREG_MEPC}}, - {CSR_MCAUSE, {"mcause", MISCREG_MCAUSE}}, - {CSR_MTVAL, {"mtval", MISCREG_MTVAL}}, - {CSR_MIP, {"mip", MISCREG_IP}}, - {CSR_PMPCFG0, {"pmpcfg0", MISCREG_PMPCFG0}}, - // pmpcfg1 rv32 only - {CSR_PMPCFG2, {"pmpcfg2", MISCREG_PMPCFG2}}, - // pmpcfg3 rv32 only - {CSR_PMPADDR00, {"pmpaddr0", MISCREG_PMPADDR00}}, - {CSR_PMPADDR01, {"pmpaddr1", MISCREG_PMPADDR01}}, - {CSR_PMPADDR02, {"pmpaddr2", MISCREG_PMPADDR02}}, - {CSR_PMPADDR03, {"pmpaddr3", MISCREG_PMPADDR03}}, - {CSR_PMPADDR04, {"pmpaddr4", MISCREG_PMPADDR04}}, - {CSR_PMPADDR05, {"pmpaddr5", MISCREG_PMPADDR05}}, - {CSR_PMPADDR06, {"pmpaddr6", MISCREG_PMPADDR06}}, - {CSR_PMPADDR07, {"pmpaddr7", MISCREG_PMPADDR07}}, - {CSR_PMPADDR08, {"pmpaddr8", MISCREG_PMPADDR08}}, - {CSR_PMPADDR09, {"pmpaddr9", MISCREG_PMPADDR09}}, - {CSR_PMPADDR10, {"pmpaddr10", MISCREG_PMPADDR10}}, - {CSR_PMPADDR11, {"pmpaddr11", MISCREG_PMPADDR11}}, - {CSR_PMPADDR12, {"pmpaddr12", MISCREG_PMPADDR12}}, - {CSR_PMPADDR13, {"pmpaddr13", MISCREG_PMPADDR13}}, - {CSR_PMPADDR14, {"pmpaddr14", MISCREG_PMPADDR14}}, - {CSR_PMPADDR15, {"pmpaddr15", MISCREG_PMPADDR15}}, - {CSR_MCYCLE, {"mcycle", MISCREG_CYCLE}}, - {CSR_MINSTRET, {"minstret", MISCREG_INSTRET}}, - {CSR_MHPMCOUNTER03, {"mhpmcounter03", MISCREG_HPMCOUNTER03}}, - {CSR_MHPMCOUNTER04, {"mhpmcounter04", MISCREG_HPMCOUNTER04}}, - {CSR_MHPMCOUNTER05, {"mhpmcounter05", MISCREG_HPMCOUNTER05}}, - {CSR_MHPMCOUNTER06, {"mhpmcounter06", MISCREG_HPMCOUNTER06}}, - {CSR_MHPMCOUNTER07, {"mhpmcounter07", MISCREG_HPMCOUNTER07}}, - {CSR_MHPMCOUNTER08, {"mhpmcounter08", MISCREG_HPMCOUNTER08}}, - {CSR_MHPMCOUNTER09, {"mhpmcounter09", MISCREG_HPMCOUNTER09}}, - {CSR_MHPMCOUNTER10, {"mhpmcounter10", MISCREG_HPMCOUNTER10}}, - {CSR_MHPMCOUNTER11, {"mhpmcounter11", MISCREG_HPMCOUNTER11}}, - {CSR_MHPMCOUNTER12, {"mhpmcounter12", MISCREG_HPMCOUNTER12}}, - {CSR_MHPMCOUNTER13, {"mhpmcounter13", MISCREG_HPMCOUNTER13}}, - {CSR_MHPMCOUNTER14, {"mhpmcounter14", MISCREG_HPMCOUNTER14}}, - {CSR_MHPMCOUNTER15, {"mhpmcounter15", MISCREG_HPMCOUNTER15}}, - {CSR_MHPMCOUNTER16, {"mhpmcounter16", MISCREG_HPMCOUNTER16}}, - {CSR_MHPMCOUNTER17, {"mhpmcounter17", MISCREG_HPMCOUNTER17}}, - {CSR_MHPMCOUNTER18, {"mhpmcounter18", MISCREG_HPMCOUNTER18}}, - {CSR_MHPMCOUNTER19, {"mhpmcounter19", MISCREG_HPMCOUNTER19}}, - {CSR_MHPMCOUNTER20, {"mhpmcounter20", MISCREG_HPMCOUNTER20}}, - {CSR_MHPMCOUNTER21, {"mhpmcounter21", MISCREG_HPMCOUNTER21}}, - {CSR_MHPMCOUNTER22, {"mhpmcounter22", MISCREG_HPMCOUNTER22}}, - {CSR_MHPMCOUNTER23, {"mhpmcounter23", MISCREG_HPMCOUNTER23}}, - {CSR_MHPMCOUNTER24, {"mhpmcounter24", MISCREG_HPMCOUNTER24}}, - {CSR_MHPMCOUNTER25, {"mhpmcounter25", MISCREG_HPMCOUNTER25}}, - {CSR_MHPMCOUNTER26, {"mhpmcounter26", MISCREG_HPMCOUNTER26}}, - {CSR_MHPMCOUNTER27, {"mhpmcounter27", MISCREG_HPMCOUNTER27}}, - {CSR_MHPMCOUNTER28, {"mhpmcounter28", MISCREG_HPMCOUNTER28}}, - {CSR_MHPMCOUNTER29, {"mhpmcounter29", MISCREG_HPMCOUNTER29}}, - {CSR_MHPMCOUNTER30, {"mhpmcounter30", MISCREG_HPMCOUNTER30}}, - {CSR_MHPMCOUNTER31, {"mhpmcounter31", MISCREG_HPMCOUNTER31}}, - {CSR_MHPMEVENT03, {"mhpmevent03", MISCREG_HPMEVENT03}}, - {CSR_MHPMEVENT04, {"mhpmevent04", MISCREG_HPMEVENT04}}, - {CSR_MHPMEVENT05, {"mhpmevent05", MISCREG_HPMEVENT05}}, - {CSR_MHPMEVENT06, {"mhpmevent06", MISCREG_HPMEVENT06}}, - {CSR_MHPMEVENT07, {"mhpmevent07", MISCREG_HPMEVENT07}}, - {CSR_MHPMEVENT08, {"mhpmevent08", MISCREG_HPMEVENT08}}, - {CSR_MHPMEVENT09, {"mhpmevent09", MISCREG_HPMEVENT09}}, - {CSR_MHPMEVENT10, {"mhpmevent10", MISCREG_HPMEVENT10}}, - {CSR_MHPMEVENT11, {"mhpmevent11", MISCREG_HPMEVENT11}}, - {CSR_MHPMEVENT12, {"mhpmevent12", MISCREG_HPMEVENT12}}, - {CSR_MHPMEVENT13, {"mhpmevent13", MISCREG_HPMEVENT13}}, - {CSR_MHPMEVENT14, {"mhpmevent14", MISCREG_HPMEVENT14}}, - {CSR_MHPMEVENT15, {"mhpmevent15", MISCREG_HPMEVENT15}}, - {CSR_MHPMEVENT16, {"mhpmevent16", MISCREG_HPMEVENT16}}, - {CSR_MHPMEVENT17, {"mhpmevent17", MISCREG_HPMEVENT17}}, - {CSR_MHPMEVENT18, {"mhpmevent18", MISCREG_HPMEVENT18}}, - {CSR_MHPMEVENT19, {"mhpmevent19", MISCREG_HPMEVENT19}}, - {CSR_MHPMEVENT20, {"mhpmevent20", MISCREG_HPMEVENT20}}, - {CSR_MHPMEVENT21, {"mhpmevent21", MISCREG_HPMEVENT21}}, - {CSR_MHPMEVENT22, {"mhpmevent22", MISCREG_HPMEVENT22}}, - {CSR_MHPMEVENT23, {"mhpmevent23", MISCREG_HPMEVENT23}}, - {CSR_MHPMEVENT24, {"mhpmevent24", MISCREG_HPMEVENT24}}, - {CSR_MHPMEVENT25, {"mhpmevent25", MISCREG_HPMEVENT25}}, - {CSR_MHPMEVENT26, {"mhpmevent26", MISCREG_HPMEVENT26}}, - {CSR_MHPMEVENT27, {"mhpmevent27", MISCREG_HPMEVENT27}}, - {CSR_MHPMEVENT28, {"mhpmevent28", MISCREG_HPMEVENT28}}, - {CSR_MHPMEVENT29, {"mhpmevent29", MISCREG_HPMEVENT29}}, - {CSR_MHPMEVENT30, {"mhpmevent30", MISCREG_HPMEVENT30}}, - {CSR_MHPMEVENT31, {"mhpmevent31", MISCREG_HPMEVENT31}}, + {CSR_MVENDORID, {"mvendorid", MISCREG_VENDORID, rvTypeFlags(RV64, RV32)}}, + {CSR_MARCHID, {"marchid", MISCREG_ARCHID, rvTypeFlags(RV64, RV32)}}, + {CSR_MIMPID, {"mimpid", MISCREG_IMPID, rvTypeFlags(RV64, RV32)}}, + {CSR_MHARTID, {"mhartid", MISCREG_HARTID, rvTypeFlags(RV64, RV32)}}, + {CSR_MSTATUS, {"mstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}}, + {CSR_MISA, {"misa", MISCREG_ISA, rvTypeFlags(RV64, RV32)}}, + {CSR_MEDELEG, {"medeleg", MISCREG_MEDELEG, rvTypeFlags(RV64, RV32)}}, + {CSR_MIDELEG, {"mideleg", MISCREG_MIDELEG, rvTypeFlags(RV64, RV32)}}, + {CSR_MIE, {"mie", MISCREG_IE, rvTypeFlags(RV64, RV32)}}, + {CSR_MTVEC, {"mtvec", MISCREG_MTVEC, rvTypeFlags(RV64, RV32)}}, + {CSR_MCOUNTEREN, {"mcounteren", MISCREG_MCOUNTEREN, rvTypeFlags(RV64, RV32)}}, + {CSR_MSTATUSH, {"mstatush", MISCREG_MSTATUSH, rvTypeFlags(RV32)}}, + {CSR_MSCRATCH, {"mscratch", MISCREG_MSCRATCH, rvTypeFlags(RV64, RV32)}}, + {CSR_MEPC, {"mepc", MISCREG_MEPC, rvTypeFlags(RV64, RV32)}}, + {CSR_MCAUSE, {"mcause", MISCREG_MCAUSE, rvTypeFlags(RV64, RV32)}}, + {CSR_MTVAL, {"mtval", MISCREG_MTVAL, rvTypeFlags(RV64, RV32)}}, + {CSR_MIP, {"mip", MISCREG_IP, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPCFG0, {"pmpcfg0", MISCREG_PMPCFG0, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPCFG1, {"pmpcfg1", MISCREG_PMPCFG1, rvTypeFlags(RV32)}}, // pmpcfg1 rv32 only + {CSR_PMPCFG2, {"pmpcfg2", MISCREG_PMPCFG2, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPCFG3, {"pmpcfg3", MISCREG_PMPCFG3, rvTypeFlags(RV32)}}, // pmpcfg3 rv32 only + {CSR_PMPADDR00, {"pmpaddr0", MISCREG_PMPADDR00, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR01, {"pmpaddr1", MISCREG_PMPADDR01, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR02, {"pmpaddr2", MISCREG_PMPADDR02, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR03, {"pmpaddr3", MISCREG_PMPADDR03, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR04, {"pmpaddr4", MISCREG_PMPADDR04, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR05, {"pmpaddr5", MISCREG_PMPADDR05, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR06, {"pmpaddr6", MISCREG_PMPADDR06, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR07, {"pmpaddr7", MISCREG_PMPADDR07, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR08, {"pmpaddr8", MISCREG_PMPADDR08, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR09, {"pmpaddr9", MISCREG_PMPADDR09, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR10, {"pmpaddr10", MISCREG_PMPADDR10, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR11, {"pmpaddr11", MISCREG_PMPADDR11, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR12, {"pmpaddr12", MISCREG_PMPADDR12, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR13, {"pmpaddr13", MISCREG_PMPADDR13, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR14, {"pmpaddr14", MISCREG_PMPADDR14, rvTypeFlags(RV64, RV32)}}, + {CSR_PMPADDR15, {"pmpaddr15", MISCREG_PMPADDR15, rvTypeFlags(RV64, RV32)}}, + {CSR_MCYCLE, {"mcycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}}, + {CSR_MINSTRET, {"minstret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER03, {"mhpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER04, {"mhpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER05, {"mhpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER06, {"mhpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER07, {"mhpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER08, {"mhpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER09, {"mhpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER10, {"mhpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER11, {"mhpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER12, {"mhpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER13, {"mhpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER14, {"mhpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER15, {"mhpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER16, {"mhpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER17, {"mhpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER18, {"mhpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER19, {"mhpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER20, {"mhpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER21, {"mhpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER22, {"mhpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER23, {"mhpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER24, {"mhpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER25, {"mhpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER26, {"mhpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER27, {"mhpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER28, {"mhpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER29, {"mhpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER30, {"mhpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMCOUNTER31, {"mhpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}}, - {CSR_TSELECT, {"tselect", MISCREG_TSELECT}}, - {CSR_TDATA1, {"tdata1", MISCREG_TDATA1}}, - {CSR_TDATA2, {"tdata2", MISCREG_TDATA2}}, - {CSR_TDATA3, {"tdata3", MISCREG_TDATA3}}, - {CSR_DCSR, {"dcsr", MISCREG_DCSR}}, - {CSR_DPC, {"dpc", MISCREG_DPC}}, - {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH}} + {CSR_MCYCLEH, {"mcycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}}, + {CSR_MINSTRETH, {"minstreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER03H, {"mhpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER04H, {"mhpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER05H, {"mhpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER06H, {"mhpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER07H, {"mhpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER08H, {"mhpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER09H, {"mhpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER10H, {"mhpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER11H, {"mhpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER12H, {"mhpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER13H, {"mhpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER14H, {"mhpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER15H, {"mhpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER16H, {"mhpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER17H, {"mhpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER18H, {"mhpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER19H, {"mhpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER20H, {"mhpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER21H, {"mhpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER22H, {"mhpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER23H, {"mhpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER24H, {"mhpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER25H, {"mhpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER26H, {"mhpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER27H, {"mhpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER28H, {"mhpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER29H, {"mhpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER30H, {"mhpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}}, + {CSR_MHPMCOUNTER31H, {"mhpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}}, + + {CSR_MHPMEVENT03, {"mhpmevent03", MISCREG_HPMEVENT03, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT04, {"mhpmevent04", MISCREG_HPMEVENT04, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT05, {"mhpmevent05", MISCREG_HPMEVENT05, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT06, {"mhpmevent06", MISCREG_HPMEVENT06, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT07, {"mhpmevent07", MISCREG_HPMEVENT07, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT08, {"mhpmevent08", MISCREG_HPMEVENT08, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT09, {"mhpmevent09", MISCREG_HPMEVENT09, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT10, {"mhpmevent10", MISCREG_HPMEVENT10, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT11, {"mhpmevent11", MISCREG_HPMEVENT11, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT12, {"mhpmevent12", MISCREG_HPMEVENT12, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT13, {"mhpmevent13", MISCREG_HPMEVENT13, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT14, {"mhpmevent14", MISCREG_HPMEVENT14, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT15, {"mhpmevent15", MISCREG_HPMEVENT15, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT16, {"mhpmevent16", MISCREG_HPMEVENT16, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT17, {"mhpmevent17", MISCREG_HPMEVENT17, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT18, {"mhpmevent18", MISCREG_HPMEVENT18, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT19, {"mhpmevent19", MISCREG_HPMEVENT19, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT20, {"mhpmevent20", MISCREG_HPMEVENT20, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT21, {"mhpmevent21", MISCREG_HPMEVENT21, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT22, {"mhpmevent22", MISCREG_HPMEVENT22, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT23, {"mhpmevent23", MISCREG_HPMEVENT23, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT24, {"mhpmevent24", MISCREG_HPMEVENT24, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT25, {"mhpmevent25", MISCREG_HPMEVENT25, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT26, {"mhpmevent26", MISCREG_HPMEVENT26, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT27, {"mhpmevent27", MISCREG_HPMEVENT27, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT28, {"mhpmevent28", MISCREG_HPMEVENT28, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT29, {"mhpmevent29", MISCREG_HPMEVENT29, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT30, {"mhpmevent30", MISCREG_HPMEVENT30, rvTypeFlags(RV64, RV32)}}, + {CSR_MHPMEVENT31, {"mhpmevent31", MISCREG_HPMEVENT31, rvTypeFlags(RV64, RV32)}}, + + {CSR_TSELECT, {"tselect", MISCREG_TSELECT, rvTypeFlags(RV64, RV32)}}, + {CSR_TDATA1, {"tdata1", MISCREG_TDATA1, rvTypeFlags(RV64, RV32)}}, + {CSR_TDATA2, {"tdata2", MISCREG_TDATA2, rvTypeFlags(RV64, RV32)}}, + {CSR_TDATA3, {"tdata3", MISCREG_TDATA3, rvTypeFlags(RV64, RV32)}}, + {CSR_DCSR, {"dcsr", MISCREG_DCSR, rvTypeFlags(RV64, RV32)}}, + {CSR_DPC, {"dpc", MISCREG_DPC, rvTypeFlags(RV64, RV32)}}, + {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH, rvTypeFlags(RV64, RV32)}} }; /** @@ -597,6 +774,14 @@ const off_t MXL_OFFSETS[enums::Num_RiscvType] = { [RV32] = (sizeof(uint32_t) * 8 - 2), [RV64] = (sizeof(uint64_t) * 8 - 2), }; +const off_t MBE_OFFSET[enums::Num_RiscvType] = { + [RV32] = 5, + [RV64] = 37, +}; +const off_t SBE_OFFSET[enums::Num_RiscvType] = { + [RV32] = 4, + [RV64] = 36, +}; const off_t SXL_OFFSET = 34; const off_t UXL_OFFSET = 32; const off_t FS_OFFSET = 13; @@ -618,9 +803,16 @@ const RegVal STATUS_SD_MASKS[enums::Num_RiscvType] = { [RV32] = 1ULL << ((sizeof(uint32_t) * 8) - 1), [RV64] = 1ULL << ((sizeof(uint64_t) * 8) - 1), }; +const RegVal STATUS_MBE_MASK[enums::Num_RiscvType] = { + [RV32] = 1ULL << MBE_OFFSET[RV32], + [RV64] = 1ULL << MBE_OFFSET[RV64], +}; +const RegVal STATUS_SBE_MASK[enums::Num_RiscvType] = { + [RV32] = 1ULL << SBE_OFFSET[RV32], + [RV64] = 1ULL << SBE_OFFSET[RV64], +}; const RegVal STATUS_SXL_MASK = 3ULL << SXL_OFFSET; const RegVal STATUS_UXL_MASK = 3ULL << UXL_OFFSET; - const RegVal STATUS_TSR_MASK = 1ULL << 22; const RegVal STATUS_TW_MASK = 1ULL << 21; const RegVal STATUS_TVM_MASK = 1ULL << 20; @@ -645,7 +837,8 @@ const RegVal MSTATUS_MASKS[enums::Num_RiscvType] = { STATUS_VS_MASK | STATUS_MPP_MASK | STATUS_SPP_MASK | STATUS_MPIE_MASK | STATUS_SPIE_MASK | STATUS_UPIE_MASK | STATUS_MIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK, - [RV64] = STATUS_SD_MASKS[RV64] | STATUS_SXL_MASK | STATUS_UXL_MASK | + [RV64] = STATUS_SD_MASKS[RV64] | STATUS_MBE_MASK[RV64] | + STATUS_SBE_MASK[RV64] | STATUS_SXL_MASK | STATUS_UXL_MASK | STATUS_TSR_MASK | STATUS_TW_MASK | STATUS_TVM_MASK | STATUS_MXR_MASK | STATUS_SUM_MASK | STATUS_MPRV_MASK | STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK| @@ -653,6 +846,8 @@ const RegVal MSTATUS_MASKS[enums::Num_RiscvType] = { STATUS_SPIE_MASK | STATUS_UPIE_MASK | STATUS_MIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK, }; +// rv32 only +const RegVal MSTATUSH_MASKS = STATUS_MBE_MASK[RV32] | STATUS_SBE_MASK[RV32]; const RegVal SSTATUS_MASKS[enums::Num_RiscvType] = { [RV32] = STATUS_SD_MASKS[RV32] | STATUS_MXR_MASK | STATUS_SUM_MASK | STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK | @@ -709,6 +904,7 @@ const std::unordered_map CSRMasks[enums::Num_RiscvType] = { {CSR_MSTATUS, MSTATUS_MASKS[RV32]}, {CSR_MISA, MISA_MASKS[RV32]}, {CSR_MIE, MI_MASK}, + {CSR_MSTATUSH, MSTATUSH_MASKS}, {CSR_MIP, MI_MASK}}, [RV64] = {{CSR_USTATUS, USTATUS_MASKS[RV64]}, {CSR_UIE, UI_MASK}, diff --git a/src/dev/riscv/clint.cc b/src/dev/riscv/clint.cc index b27b9bf61d..209f656d3c 100644 --- a/src/dev/riscv/clint.cc +++ b/src/dev/riscv/clint.cc @@ -71,7 +71,12 @@ Clint::raiseInterruptPin(int id) // Update misc reg file ISA* isa = dynamic_cast(tc->getIsaPtr()); - isa->setMiscRegNoEffect(MISCREG_TIME, mtime); + if (isa->rvType() == RV32) { + isa->setMiscRegNoEffect(MISCREG_TIME, bits(mtime, 31, 0)); + isa->setMiscRegNoEffect(MISCREG_TIMEH, bits(mtime, 63, 32)); + } else { + isa->setMiscRegNoEffect(MISCREG_TIME, mtime); + } // Post timer interrupt uint64_t mtimecmp = registers.mtimecmp[context_id].get(); From ad107116a1d01aca2535e4c2f191278249177083 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Mon, 28 Nov 2022 08:28:12 +0000 Subject: [PATCH 054/492] arch-riscv: Support RV32 to remote gdb support rv32 cpu, fpu, csr registers to remote gdb. Change-Id: Ib821a35ff2e95f6f47569b1f4cb35cd98fcca77d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66131 Tested-by: kokoro Maintainer: Gabe Black Reviewed-by: Gabe Black Reviewed-by: Jason Lowe-Power --- src/arch/riscv/gdb-xml/SConscript | 4 + src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml | 48 +++ src/arch/riscv/gdb-xml/riscv-32bit-csr.xml | 249 ++++++++++++++ src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml | 58 ++++ src/arch/riscv/gdb-xml/riscv-32bit.xml | 13 + src/arch/riscv/remote_gdb.cc | 376 ++++++++++++++------- src/arch/riscv/remote_gdb.hh | 89 ++++- 7 files changed, 719 insertions(+), 118 deletions(-) create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit-csr.xml create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit.xml diff --git a/src/arch/riscv/gdb-xml/SConscript b/src/arch/riscv/gdb-xml/SConscript index 722137408b..bafea174d2 100644 --- a/src/arch/riscv/gdb-xml/SConscript +++ b/src/arch/riscv/gdb-xml/SConscript @@ -43,6 +43,10 @@ Import('*') +GdbXml('riscv-32bit.xml', 'gdb_xml_riscv_32bit_target', tags='riscv isa') +GdbXml('riscv-32bit-cpu.xml', 'gdb_xml_riscv_32bit_cpu', tags='riscv isa') +GdbXml('riscv-32bit-fpu.xml', 'gdb_xml_riscv_32bit_fpu', tags='riscv isa') +GdbXml('riscv-32bit-csr.xml', 'gdb_xml_riscv_32bit_csr', tags='riscv isa') GdbXml('riscv-64bit.xml', 'gdb_xml_riscv_64bit_target', tags='riscv isa') GdbXml('riscv-64bit-cpu.xml', 'gdb_xml_riscv_64bit_cpu', tags='riscv isa') GdbXml('riscv-64bit-fpu.xml', 'gdb_xml_riscv_64bit_fpu', tags='riscv isa') diff --git a/src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml b/src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml new file mode 100644 index 0000000000..c48f770ded --- /dev/null +++ b/src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/arch/riscv/gdb-xml/riscv-32bit-csr.xml b/src/arch/riscv/gdb-xml/riscv-32bit-csr.xml new file mode 100644 index 0000000000..7cf7bc05b4 --- /dev/null +++ b/src/arch/riscv/gdb-xml/riscv-32bit-csr.xml @@ -0,0 +1,249 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml b/src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml new file mode 100644 index 0000000000..9661b0e004 --- /dev/null +++ b/src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/arch/riscv/gdb-xml/riscv-32bit.xml b/src/arch/riscv/gdb-xml/riscv-32bit.xml new file mode 100644 index 0000000000..982e6b0674 --- /dev/null +++ b/src/arch/riscv/gdb-xml/riscv-32bit.xml @@ -0,0 +1,13 @@ + + + + + riscv + + + + diff --git a/src/arch/riscv/remote_gdb.cc b/src/arch/riscv/remote_gdb.cc index 4bdd88fde6..54ecde061f 100644 --- a/src/arch/riscv/remote_gdb.cc +++ b/src/arch/riscv/remote_gdb.cc @@ -135,6 +135,10 @@ #include +#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_cpu.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_csr.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_fpu.hh" +#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_target.hh" #include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_cpu.hh" #include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_csr.hh" #include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_fpu.hh" @@ -163,8 +167,31 @@ getRvType(ThreadContext* tc) return isa->rvType(); } +template +static void +setRegNoEffectWithMask( + ThreadContext *context, RiscvType type, CSRIndex idx, xint val) +{ + RegVal oldVal, newVal; + RegVal mask = CSRMasks[type].at(idx); + oldVal = context->readMiscRegNoEffect(CSRData.at(idx).physIndex); + newVal = (oldVal & ~mask) | (val & mask); + context->setMiscRegNoEffect(CSRData.at(idx).physIndex, newVal); +} + +template +static void +setRegWithMask(ThreadContext *context, RiscvType type, CSRIndex idx, xint val) +{ + RegVal oldVal, newVal; + RegVal mask = CSRMasks[type].at(idx); + oldVal = context->readMiscReg(CSRData.at(idx).physIndex); + newVal = (oldVal & ~mask) | (val & mask); + context->setMiscReg(CSRData.at(idx).physIndex, newVal); +} + RemoteGDB::RemoteGDB(System *_system, int _port) - : BaseRemoteGDB(_system, _port), regCache64(this) + : BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this) { } @@ -193,6 +220,201 @@ RemoteGDB::acc(Addr va, size_t len) return context()->getProcessPtr()->pTable->lookup(va) != nullptr; } +void +RemoteGDB::Riscv32GdbRegCache::getRegs(ThreadContext *context) +{ + DPRINTF(GDBAcc, "getregs in remotegdb, size %lu\n", size()); + auto& RVxCSRMasks = CSRMasks[RV32]; + + // General registers + for (int i = 0; i < int_reg::NumArchRegs; i++) { + r.gpr[i] = context->getReg(intRegClass[i]); + } + r.pc = context->pcState().instAddr(); + + // Floating point registers + for (int i = 0; i < float_reg::NumRegs; i++) + r.fpu[i] = context->getReg(floatRegClass[i]); + r.fflags = context->readMiscRegNoEffect( + CSRData.at(CSR_FFLAGS).physIndex) & RVxCSRMasks.at(CSR_FFLAGS); + r.frm = context->readMiscRegNoEffect( + CSRData.at(CSR_FRM).physIndex) & RVxCSRMasks.at(CSR_FRM); + r.fcsr = context->readMiscRegNoEffect( + CSRData.at(CSR_FCSR).physIndex) & RVxCSRMasks.at(CSR_FCSR); + + // CSR registers + r.cycle = context->readMiscRegNoEffect( + CSRData.at(CSR_CYCLE).physIndex); + r.cycleh = context->readMiscRegNoEffect( + CSRData.at(CSR_CYCLEH).physIndex); + r.time = context->readMiscRegNoEffect( + CSRData.at(CSR_TIME).physIndex); + r.timeh = context->readMiscRegNoEffect( + CSRData.at(CSR_TIMEH).physIndex); + + // U mode CSR + r.ustatus = context->readMiscReg( + CSRData.at(CSR_USTATUS).physIndex) & RVxCSRMasks.at(CSR_USTATUS); + r.uie = context->readMiscReg( + CSRData.at(CSR_UIE).physIndex) & RVxCSRMasks.at(CSR_UIE); + r.utvec = context->readMiscRegNoEffect( + CSRData.at(CSR_UTVEC).physIndex); + r.uscratch = context->readMiscRegNoEffect( + CSRData.at(CSR_USCRATCH).physIndex); + r.uepc = context->readMiscRegNoEffect( + CSRData.at(CSR_UEPC).physIndex); + r.ucause = context->readMiscRegNoEffect( + CSRData.at(CSR_UCAUSE).physIndex); + r.utval = context->readMiscRegNoEffect( + CSRData.at(CSR_UTVAL).physIndex); + r.uip = context->readMiscReg( + CSRData.at(CSR_UIP).physIndex) & RVxCSRMasks.at(CSR_UIP); + + // S mode CSR + r.sstatus = context->readMiscReg( + CSRData.at(CSR_SSTATUS).physIndex) & RVxCSRMasks.at(CSR_SSTATUS); + r.sedeleg = context->readMiscRegNoEffect( + CSRData.at(CSR_SEDELEG).physIndex); + r.sideleg = context->readMiscRegNoEffect( + CSRData.at(CSR_SIDELEG).physIndex); + r.sie = context->readMiscReg( + CSRData.at(CSR_SIE).physIndex) & RVxCSRMasks.at(CSR_SIE); + r.stvec = context->readMiscRegNoEffect( + CSRData.at(CSR_STVEC).physIndex); + r.scounteren = context->readMiscRegNoEffect( + CSRData.at(CSR_SCOUNTEREN).physIndex); + r.sscratch = context->readMiscRegNoEffect( + CSRData.at(CSR_SSCRATCH).physIndex); + r.sepc = context->readMiscReg( + CSRData.at(CSR_SEPC).physIndex); + r.scause = context->readMiscRegNoEffect( + CSRData.at(CSR_SCAUSE).physIndex); + r.stval = context->readMiscRegNoEffect( + CSRData.at(CSR_STVAL).physIndex); + r.sip = context->readMiscReg( + CSRData.at(CSR_SIP).physIndex) & RVxCSRMasks.at(CSR_SIP); + r.satp = context->readMiscRegNoEffect( + CSRData.at(CSR_SATP).physIndex); + + // M mode CSR + r.mvendorid = context->readMiscRegNoEffect( + CSRData.at(CSR_MVENDORID).physIndex); + r.marchid = context->readMiscRegNoEffect( + CSRData.at(CSR_MARCHID).physIndex); + r.mimpid = context->readMiscRegNoEffect( + CSRData.at(CSR_MIMPID).physIndex); + r.mhartid = context->contextId(); + r.mstatus = context->readMiscReg( + CSRData.at(CSR_MSTATUS).physIndex) & RVxCSRMasks.at(CSR_MSTATUS); + r.misa = context->readMiscRegNoEffect( + CSRData.at(CSR_MISA).physIndex) & RVxCSRMasks.at(CSR_MISA); + r.medeleg = context->readMiscRegNoEffect( + CSRData.at(CSR_MEDELEG).physIndex); + r.mideleg = context->readMiscRegNoEffect( + CSRData.at(CSR_MIDELEG).physIndex); + r.mie = context->readMiscReg( + CSRData.at(CSR_MIE).physIndex) & RVxCSRMasks.at(CSR_MIE); + r.mtvec = context->readMiscRegNoEffect( + CSRData.at(CSR_MTVEC).physIndex); + r.mcounteren = context->readMiscRegNoEffect( + CSRData.at(CSR_MCOUNTEREN).physIndex); + r.mstatush = context->readMiscReg( + CSRData.at(CSR_MSTATUSH).physIndex) & RVxCSRMasks.at(CSR_MSTATUSH); + r.mscratch = context->readMiscRegNoEffect( + CSRData.at(CSR_MSCRATCH).physIndex); + r.mepc = context->readMiscReg( + CSRData.at(CSR_MEPC).physIndex); + r.mcause = context->readMiscRegNoEffect( + CSRData.at(CSR_MCAUSE).physIndex); + r.mtval = context->readMiscRegNoEffect( + CSRData.at(CSR_MTVAL).physIndex); + r.mip = context->readMiscReg( + CSRData.at(CSR_MIP).physIndex) & RVxCSRMasks.at(CSR_MIP); + + // H mode CSR (to be implemented) +} + +void +RemoteGDB::Riscv32GdbRegCache::setRegs(ThreadContext *context) const +{ + DPRINTF(GDBAcc, "setregs in remotegdb \n"); + for (int i = 0; i < int_reg::NumArchRegs; i++) + context->setReg(intRegClass[i], r.gpr[i]); + context->pcState(r.pc); + + // Floating point registers + for (int i = 0; i < float_reg::NumRegs; i++) + context->setReg(floatRegClass[i], r.fpu[i]); + + setRegNoEffectWithMask(context, RV32, CSR_FFLAGS, r.fflags); + setRegNoEffectWithMask(context, RV32, CSR_FRM, r.frm); + setRegNoEffectWithMask(context, RV32, CSR_FCSR, r.fcsr); + + // TODO: implement CSR counter registers for mcycle(h), minstret(h) + + // U mode CSR + setRegNoEffectWithMask(context, RV32, CSR_USTATUS, r.ustatus); + setRegWithMask(context, RV32, CSR_UIE, r.uie); + setRegWithMask(context, RV32, CSR_UIP, r.uip); + context->setMiscRegNoEffect( + CSRData.at(CSR_UTVEC).physIndex, r.utvec); + context->setMiscRegNoEffect( + CSRData.at(CSR_USCRATCH).physIndex, r.uscratch); + context->setMiscRegNoEffect( + CSRData.at(CSR_UEPC).physIndex, r.uepc); + context->setMiscRegNoEffect( + CSRData.at(CSR_UCAUSE).physIndex, r.ucause); + context->setMiscRegNoEffect( + CSRData.at(CSR_UTVAL).physIndex, r.utval); + + // S mode CSR + setRegNoEffectWithMask(context, RV32, CSR_SSTATUS, r.sstatus); + setRegWithMask(context, RV32, CSR_SIE, r.sie); + setRegWithMask(context, RV32, CSR_SIP, r.sip); + context->setMiscRegNoEffect( + CSRData.at(CSR_SEDELEG).physIndex, r.sedeleg); + context->setMiscRegNoEffect( + CSRData.at(CSR_SIDELEG).physIndex, r.sideleg); + context->setMiscRegNoEffect( + CSRData.at(CSR_STVEC).physIndex, r.stvec); + context->setMiscRegNoEffect( + CSRData.at(CSR_SCOUNTEREN).physIndex, r.scounteren); + context->setMiscRegNoEffect( + CSRData.at(CSR_SSCRATCH).physIndex, r.sscratch); + context->setMiscRegNoEffect( + CSRData.at(CSR_SEPC).physIndex, r.sepc); + context->setMiscRegNoEffect( + CSRData.at(CSR_SCAUSE).physIndex, r.scause); + context->setMiscRegNoEffect( + CSRData.at(CSR_STVAL).physIndex, r.stval); + context->setMiscRegNoEffect( + CSRData.at(CSR_SATP).physIndex, r.satp); + + // M mode CSR + setRegNoEffectWithMask(context, RV32, CSR_MSTATUS, r.mstatus); + setRegNoEffectWithMask(context, RV32, CSR_MISA, r.misa); + setRegWithMask(context, RV32, CSR_MIE, r.mie); + setRegWithMask(context, RV32, CSR_MIP, r.mip); + context->setMiscRegNoEffect( + CSRData.at(CSR_MEDELEG).physIndex, r.medeleg); + context->setMiscRegNoEffect( + CSRData.at(CSR_MIDELEG).physIndex, r.mideleg); + context->setMiscRegNoEffect( + CSRData.at(CSR_MTVEC).physIndex, r.mtvec); + context->setMiscRegNoEffect( + CSRData.at(CSR_MCOUNTEREN).physIndex, r.mcounteren); + context->setMiscRegNoEffect( + CSRData.at(CSR_MSCRATCH).physIndex, r.mscratch); + context->setMiscRegNoEffect( + CSRData.at(CSR_MEPC).physIndex, r.mepc); + context->setMiscRegNoEffect( + CSRData.at(CSR_MCAUSE).physIndex, r.mcause); + context->setMiscRegNoEffect( + CSRData.at(CSR_MTVAL).physIndex, r.mtval); + + // H mode CSR (to be implemented) +} + void RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) { @@ -222,7 +444,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_TIME).physIndex); // U mode CSR - r.ustatus = context->readMiscRegNoEffect( + r.ustatus = context->readMiscReg( CSRData.at(CSR_USTATUS).physIndex) & RVxCSRMasks.at(CSR_USTATUS); r.uie = context->readMiscReg( CSRData.at(CSR_UIE).physIndex) & RVxCSRMasks.at(CSR_UIE); @@ -240,7 +462,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_UIP).physIndex) & RVxCSRMasks.at(CSR_UIP); // S mode CSR - r.sstatus = context->readMiscRegNoEffect( + r.sstatus = context->readMiscReg( CSRData.at(CSR_SSTATUS).physIndex) & RVxCSRMasks.at(CSR_SSTATUS); r.sedeleg = context->readMiscRegNoEffect( CSRData.at(CSR_SEDELEG).physIndex); @@ -254,7 +476,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_SCOUNTEREN).physIndex); r.sscratch = context->readMiscRegNoEffect( CSRData.at(CSR_SSCRATCH).physIndex); - r.sepc = context->readMiscRegNoEffect( + r.sepc = context->readMiscReg( CSRData.at(CSR_SEPC).physIndex); r.scause = context->readMiscRegNoEffect( CSRData.at(CSR_SCAUSE).physIndex); @@ -272,9 +494,8 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_MARCHID).physIndex); r.mimpid = context->readMiscRegNoEffect( CSRData.at(CSR_MIMPID).physIndex); - r.mhartid = context->readMiscRegNoEffect( - CSRData.at(CSR_MHARTID).physIndex); - r.mstatus = context->readMiscRegNoEffect( + r.mhartid = context->contextId(); + r.mstatus = context->readMiscReg( CSRData.at(CSR_MSTATUS).physIndex) & RVxCSRMasks.at(CSR_MSTATUS); r.misa = context->readMiscRegNoEffect( CSRData.at(CSR_MISA).physIndex) & RVxCSRMasks.at(CSR_MISA); @@ -290,7 +511,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_MCOUNTEREN).physIndex); r.mscratch = context->readMiscRegNoEffect( CSRData.at(CSR_MSCRATCH).physIndex); - r.mepc = context->readMiscRegNoEffect( + r.mepc = context->readMiscReg( CSRData.at(CSR_MEPC).physIndex); r.mcause = context->readMiscRegNoEffect( CSRData.at(CSR_MCAUSE).physIndex); @@ -305,11 +526,6 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context) void RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const { - // NOTE: no error will be reported for attempting to set masked bits. - RegVal oldVal; - int mask; - RegVal newVal; - DPRINTF(GDBAcc, "setregs in remotegdb \n"); for (int i = 0; i < int_reg::NumArchRegs; i++) context->setReg(intRegClass[i], r.gpr[i]); @@ -319,48 +535,16 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const for (int i = 0; i < float_reg::NumRegs; i++) context->setReg(floatRegClass[i], r.fpu[i]); - auto& RVxCSRMasks = CSRMasks[RV64]; + setRegNoEffectWithMask(context, RV64, CSR_FFLAGS, r.fflags); + setRegNoEffectWithMask(context, RV64, CSR_FRM, r.frm); + setRegNoEffectWithMask(context, RV64, CSR_FCSR, r.fcsr); - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_FFLAGS).physIndex); - mask = RVxCSRMasks.at(CSR_FFLAGS); - newVal = (oldVal & ~mask) | (r.fflags & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_FFLAGS).physIndex, newVal); - - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_FRM).physIndex); - mask = RVxCSRMasks.at(CSR_FRM); - newVal = (oldVal & ~mask) | (r.frm & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_FRM).physIndex, newVal); - - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_FCSR).physIndex); - mask = RVxCSRMasks.at(CSR_FCSR); - newVal = (oldVal & ~mask) | (r.fcsr & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_FCSR).physIndex, newVal); - - // CSR registers - context->setMiscRegNoEffect( - CSRData.at(CSR_CYCLE).physIndex, r.cycle); - context->setMiscRegNoEffect( - CSRData.at(CSR_TIME).physIndex, r.time); + // TODO: implement CSR counter registers for mcycle, minstret // U mode CSR - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_USTATUS).physIndex); - mask = RVxCSRMasks.at(CSR_USTATUS); - newVal = (oldVal & ~mask) | (r.ustatus & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_USTATUS).physIndex, newVal); - oldVal = context->readMiscReg( - CSRData.at(CSR_UIE).physIndex); - mask = RVxCSRMasks.at(CSR_UIE); - newVal = (oldVal & ~mask) | (r.uie & mask); - context->setMiscReg( - CSRData.at(CSR_UIE).physIndex, newVal); + setRegNoEffectWithMask(context, RV64, CSR_USTATUS, r.ustatus); + setRegWithMask(context, RV64, CSR_UIE, r.uie); + setRegWithMask(context, RV64, CSR_UIP, r.uip); context->setMiscRegNoEffect( CSRData.at(CSR_UTVEC).physIndex, r.utvec); context->setMiscRegNoEffect( @@ -371,30 +555,15 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_UCAUSE).physIndex, r.ucause); context->setMiscRegNoEffect( CSRData.at(CSR_UTVAL).physIndex, r.utval); - oldVal = context->readMiscReg( - CSRData.at(CSR_UIP).physIndex); - mask = RVxCSRMasks.at(CSR_UIP); - newVal = (oldVal & ~mask) | (r.uip & mask); - context->setMiscReg( - CSRData.at(CSR_UIP).physIndex, newVal); // S mode CSR - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_SSTATUS).physIndex); - mask = RVxCSRMasks.at(CSR_SSTATUS); - newVal = (oldVal & ~mask) | (r.sstatus & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_SSTATUS).physIndex, newVal); + setRegNoEffectWithMask(context, RV64, CSR_SSTATUS, r.sstatus); + setRegWithMask(context, RV64, CSR_SIE, r.sie); + setRegWithMask(context, RV64, CSR_SIP, r.sip); context->setMiscRegNoEffect( CSRData.at(CSR_SEDELEG).physIndex, r.sedeleg); context->setMiscRegNoEffect( CSRData.at(CSR_SIDELEG).physIndex, r.sideleg); - oldVal = context->readMiscReg( - CSRData.at(CSR_SIE).physIndex); - mask = RVxCSRMasks.at(CSR_SIE); - newVal = (oldVal & ~mask) | (r.sie & mask); - context->setMiscReg( - CSRData.at(CSR_SIE).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_STVEC).physIndex, r.stvec); context->setMiscRegNoEffect( @@ -407,46 +576,18 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_SCAUSE).physIndex, r.scause); context->setMiscRegNoEffect( CSRData.at(CSR_STVAL).physIndex, r.stval); - oldVal = context->readMiscReg( - CSRData.at(CSR_SIP).physIndex); - mask = RVxCSRMasks.at(CSR_SIP); - newVal = (oldVal & ~mask) | (r.sip & mask); - context->setMiscReg( - CSRData.at(CSR_SIP).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_SATP).physIndex, r.satp); // M mode CSR - context->setMiscRegNoEffect( - CSRData.at(CSR_MVENDORID).physIndex, r.mvendorid); - context->setMiscRegNoEffect( - CSRData.at(CSR_MARCHID).physIndex, r.marchid); - context->setMiscRegNoEffect( - CSRData.at(CSR_MIMPID).physIndex, r.mimpid); - context->setMiscRegNoEffect( - CSRData.at(CSR_MHARTID).physIndex, r.mhartid); - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_MSTATUS).physIndex); - mask = RVxCSRMasks.at(CSR_MSTATUS); - newVal = (oldVal & ~mask) | (r.mstatus & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_MSTATUS).physIndex, newVal); - oldVal = context->readMiscRegNoEffect( - CSRData.at(CSR_MISA).physIndex); - mask = RVxCSRMasks.at(CSR_MISA); - newVal = (oldVal & ~mask) | (r.misa & mask); - context->setMiscRegNoEffect( - CSRData.at(CSR_MISA).physIndex, newVal); + setRegNoEffectWithMask(context, RV64, CSR_MSTATUS, r.mstatus); + setRegNoEffectWithMask(context, RV64, CSR_MISA, r.misa); + setRegWithMask(context, RV64, CSR_MIE, r.mie); + setRegWithMask(context, RV64, CSR_MIP, r.mip); context->setMiscRegNoEffect( CSRData.at(CSR_MEDELEG).physIndex, r.medeleg); context->setMiscRegNoEffect( CSRData.at(CSR_MIDELEG).physIndex, r.mideleg); - oldVal = context->readMiscReg( - CSRData.at(CSR_MIE).physIndex); - mask = RVxCSRMasks.at(CSR_MIE); - newVal = (oldVal & ~mask) | (r.mie & mask); - context->setMiscReg( - CSRData.at(CSR_MIE).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_MTVEC).physIndex, r.mtvec); context->setMiscRegNoEffect( @@ -459,12 +600,6 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_MCAUSE).physIndex, r.mcause); context->setMiscRegNoEffect( CSRData.at(CSR_MTVAL).physIndex, r.mtval); - oldVal = context->readMiscReg( - CSRData.at(CSR_MIP).physIndex); - mask = RVxCSRMasks.at(CSR_MIP); - newVal = (oldVal & ~mask) | (r.mip & mask); - context->setMiscReg( - CSRData.at(CSR_MIP).physIndex, newVal); // H mode CSR (to be implemented) } @@ -483,14 +618,17 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output) x, std::string(reinterpret_cast(Blobs::s), \ Blobs::s##_len) \ } - static const std::map annexMap{ - GDB_XML("riscv-64bit.xml", gdb_xml_riscv_64bit_target), - GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_64bit_cpu), - GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_64bit_fpu), - GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_64bit_csr)}; -#undef GDB_XML - if (getRvType(context()) == RV32) - return false; + static const std::map annexMaps[enums::Num_RiscvType] = { + [RV32] = {GDB_XML("target.xml", gdb_xml_riscv_32bit_target), + GDB_XML("riscv-32bit-cpu.xml", gdb_xml_riscv_32bit_cpu), + GDB_XML("riscv-32bit-fpu.xml", gdb_xml_riscv_32bit_fpu), + GDB_XML("riscv-32bit-csr.xml", gdb_xml_riscv_32bit_csr)}, + [RV64] = {GDB_XML("target.xml", gdb_xml_riscv_64bit_target), + GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_64bit_cpu), + GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_64bit_fpu), + GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_64bit_csr)}, + }; + auto& annexMap = annexMaps[getRvType(context())]; auto it = annexMap.find(annex); if (it == annexMap.end()) return false; @@ -501,7 +639,11 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output) BaseGdbRegCache * RemoteGDB::gdbRegs() { - return ®Cache64; + BaseGdbRegCache* regs[enums::Num_RiscvType] = { + [RV32] = ®Cache32, + [RV64] = ®Cache64, + }; + return regs[getRvType(context())]; } } // namespace gem5 diff --git a/src/arch/riscv/remote_gdb.hh b/src/arch/riscv/remote_gdb.hh index a8262a6a2a..8f8abb0587 100644 --- a/src/arch/riscv/remote_gdb.hh +++ b/src/arch/riscv/remote_gdb.hh @@ -58,6 +58,92 @@ class RemoteGDB : public BaseRemoteGDB // A breakpoint will be 2 bytes if it is compressed and 4 if not bool checkBpKind(size_t kind) override { return kind == 2 || kind == 4; } + class Riscv32GdbRegCache : public BaseGdbRegCache + { + using BaseGdbRegCache::BaseGdbRegCache; + private: + /** + * RISC-V Register Cache + * Order and sizes of registers found in ext/gdb-xml/riscv.xml + * To add support for more CSRs: + * 1. Uncomment relevant lines in ext/gdb-xml/riscv-32bit-csr.xml + * 2. Add register to struct below + * 3. Modify RiscvGdbRegCache::getRegs and setRegs + */ + struct GEM5_PACKED + { + uint32_t gpr[int_reg::NumArchRegs]; + uint32_t pc; + uint64_t fpu[float_reg::NumRegs]; + uint32_t fflags; + uint32_t frm; + uint32_t fcsr; + // Placeholder for byte alignment + uint32_t placeholder; + uint32_t cycle; + uint32_t time; + uint32_t cycleh; + uint32_t timeh; + uint32_t ustatus; + uint32_t uie; + uint32_t utvec; + uint32_t uscratch; + uint32_t uepc; + uint32_t ucause; + uint32_t utval; + uint32_t uip; + uint32_t sstatus; + uint32_t sedeleg; + uint32_t sideleg; + uint32_t sie; + uint32_t stvec; + uint32_t scounteren; + uint32_t sscratch; + uint32_t sepc; + uint32_t scause; + uint32_t stval; + uint32_t sip; + uint32_t satp; + uint32_t mvendorid; + uint32_t marchid; + uint32_t mimpid; + uint32_t mhartid; + uint32_t mstatus; + uint32_t misa; + uint32_t medeleg; + uint32_t mideleg; + uint32_t mie; + uint32_t mtvec; + uint32_t mcounteren; + uint32_t mstatush; + uint32_t mscratch; + uint32_t mepc; + uint32_t mcause; + uint32_t mtval; + uint32_t mip; + uint32_t hstatus; + uint32_t hedeleg; + uint32_t hideleg; + uint32_t hie; + uint32_t htvec; + uint32_t hscratch; + uint32_t hepc; + uint32_t hcause; + uint32_t hbadaddr; + uint32_t hip; + } r; + public: + char *data() const { return (char *)&r; } + size_t size() const { return sizeof(r); } + void getRegs(ThreadContext*); + void setRegs(ThreadContext*) const; + + const std::string + name() const + { + return gdb->name() + ".RiscvGdbRegCache"; + } + }; class Riscv64GdbRegCache : public BaseGdbRegCache { using BaseGdbRegCache::BaseGdbRegCache; @@ -70,7 +156,7 @@ class RemoteGDB : public BaseRemoteGDB * 2. Add register to struct below * 3. Modify RiscvGdbRegCache::getRegs and setRegs */ - struct + struct GEM5_PACKED { uint64_t gpr[int_reg::NumArchRegs]; uint64_t pc; @@ -142,6 +228,7 @@ class RemoteGDB : public BaseRemoteGDB } }; + Riscv32GdbRegCache regCache32; Riscv64GdbRegCache regCache64; public: From 2ed43238994e3fcb938c238f1b132385abfb4d7e Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 12 Dec 2022 13:52:22 -0800 Subject: [PATCH 055/492] tests: Fix compiler-tests.sh for no build args passed case When a user ran "tests/compiler-tests.sh" without passing any arguments, the compiler tests would fail with: ``` scons: Reading SConscript files ... Error: No existing build directory and no variant for /gem5 ``` However, when passed with arguments, such as: ``` ./tests/compiler-tests.sh -j6 ``` the tests passed. The fix for this is to merge the "$build_out" and "$build_args" into a single string when executing the docker. I do not know exactly why this works, but it does fix the error. Change-Id: Ibcd316668b60fb7706f0ee05ab6dadf56228319d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66631 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- tests/compiler-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/compiler-tests.sh b/tests/compiler-tests.sh index e01d9de347..f16e8e5fdb 100755 --- a/tests/compiler-tests.sh +++ b/tests/compiler-tests.sh @@ -135,7 +135,7 @@ for compiler in ${images[@]}; do docker run --rm -v "${gem5_root}":"/gem5" -u $UID:$GID \ -w /gem5 --memory="${docker_mem_limit}" $repo_name \ /usr/bin/env python3 /usr/bin/scons --ignore-style \ - "${build_out}" "${build_args}" + "${build_out} ${build_args}" }>"${build_stdout}" 2>"${build_stderr}" result=$? From f7d0808a5c87a7ca3349bc3e68bb33a4fe912d3c Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Wed, 30 Nov 2022 16:36:05 +0800 Subject: [PATCH 056/492] arch-riscv: Fork Zba, Zbb, Zbc, Zbs instructions into rv32 / rv64 The following instructions will be supported for both rv32 and rv64 Zba extensions: SLLI.UW SH1ADD SH2ADD SH3ADD ADD.UW SH1ADD.UW SH2ADD.UW SH3ADD.UW Zbb extensions: CLZ CTZ CPOP SEXT.B SEXT.H ORC.B RORI REV8 CLZW CTZW CPOPW RORIW ROL MIN XNOR MINU ROR MAX ORN MAXU ANDN ROLW ZEXT.H RORW Zbc extensions: CLMUL CLMULR CLMULH Zbs extensions: BSETI BCLRI BINVI BEXTI BSET BCLR BINV BEXT Change-Id: I3f489a3a1bab8799e2d95218740e495313b9961d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66211 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/isa/bitfields.isa | 1 + src/arch/riscv/isa/decoder.isa | 320 +++++++++++++++++-------------- 2 files changed, 174 insertions(+), 147 deletions(-) diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa index 863982cfec..4f58416237 100644 --- a/src/arch/riscv/isa/bitfields.isa +++ b/src/arch/riscv/isa/bitfields.isa @@ -129,6 +129,7 @@ def bitfield M5FUNC <31:25>; // Cryptography instructions def bitfield BIT24 <24>; +def bitfield BIT25 <25>; def bitfield RNUM <23:20>; def bitfield KFUNCT5 <29:25>; def bitfield BS <31:30>; diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 885794032a..458327e5ec 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -577,8 +577,12 @@ decode QUADRANT default Unknown::unknown() { }}); } 0x05: bseti({{ - uint64_t index = imm & (64 - 1); - Rd = Rs1 | (UINT64_C(1) << index); + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + uint64_t index = imm & rvSelect(32 - 1, 64 - 1); + Rd = rvSext(Rs1 | (UINT64_C(1) << index)); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x06: decode BIT24 { 0x0: aes64im({{ @@ -589,24 +593,32 @@ decode QUADRANT default Unknown::unknown() { }}, imm_type = int32_t, imm_code={{ imm = RNUM; }}); } 0x09: bclri({{ - uint64_t index = imm & (64 - 1); - Rd = Rs1 & (~(UINT64_C(1) << index)); + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + uint64_t index = imm & rvSelect(32 - 1, 64 - 1); + Rd = rvSext(Rs1 & (~(UINT64_C(1) << index))); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x0d: binvi({{ - uint64_t index = imm & (64 - 1); - Rd = Rs1 ^ (UINT64_C(1) << index); + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + uint64_t index = imm & rvSelect(32 - 1, 64 - 1); + Rd = rvSext(Rs1 ^ (UINT64_C(1) << index)); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); } format ROp { 0x0c: decode RS2 { 0x00: clz({{ - Rd = clz64(Rs1); + Rd = (machInst.rv_type == RV32) ? clz32(Rs1) : clz64(Rs1); }}); 0x01: ctz({{ - Rd = ctz64(Rs1); + Rd = (machInst.rv_type == RV32) ? ctz32(Rs1) : ctz64(Rs1); }}); 0x02: cpop({{ - Rd = popCount(Rs1); + Rd = (machInst.rv_type == RV32) ? popCount(Rs1<31:0>) : popCount(Rs1); }}); 0x04: sext_b({{ Rd = sext<8>(Rs1_ub); @@ -649,7 +661,7 @@ decode QUADRANT default Unknown::unknown() { result |= (Rs1<47:40> ? UINT64_C(0xff) : 0x0) << 40; result |= (Rs1<55:48> ? UINT64_C(0xff) : 0x0) << 48; result |= (Rs1<63:56> ? UINT64_C(0xff) : 0x0) << 56; - Rd = result; + Rd = rvSext(result); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x8: srai({{ if (rvSelect((bool)SHAMT6BIT5, false)) { @@ -659,30 +671,53 @@ decode QUADRANT default Unknown::unknown() { Rd_sd = rvSext(Rs1_sd) >> imm; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0x9: bexti({{ - uint64_t index = imm & (64 - 1); + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + uint64_t index = imm & rvSelect(32 - 1, 64 - 1); Rd = (Rs1 >> index) & 0x1; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0xc: rori({{ - Rd = (Rs1 >> imm) | (Rs1 << ((64 - imm) & (64 - 1))); + if (rvSelect((bool)SHAMT6BIT5, false)) { + return std::make_shared( + "shmat[5] != 0", machInst); + } + uint64_t xlen = rvSelect(32, 64); + Rd = rvSext((rvZext(Rs1) >> imm) + | (Rs1 << ((xlen - imm) & (xlen - 1)))); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); 0xd: decode RS2 { - 0x18: rev8({{ - uint64_t result = 0; - result |= - ((Rs1 & 0xffULL) << 56) - | (((Rs1 >> 56) & 0xffULL)); - result |= - (((Rs1 >> 8) & 0xffULL) << 48) - | (((Rs1 >> 48) & 0xffULL) << 8); - result |= - (((Rs1 >> 16) & 0xffULL) << 40) - | (((Rs1 >> 40) & 0xffULL) << 16); - result |= - (((Rs1 >> 24) & 0xffULL) << 32) - | (((Rs1 >> 32) & 0xffULL) << 24); - Rd = result; - }}, - imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); + 0x18: decode BIT25 { + 0x0: rv32_rev8({{ + uint32_t result = 0; + result |= + ((Rs1_uw & 0xffUL) << 24) + | (((Rs1_uw >> 24) & 0xffUL)); + result |= + (((Rs1_uw >> 8) & 0xffUL) << 16) + | (((Rs1_uw >> 16) & 0xffUL) << 8); + Rd = rvSext(result); + }}, + imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + 0x1: rev8({{ + uint64_t result = 0; + result |= + ((Rs1 & 0xffULL) << 56) + | (((Rs1 >> 56) & 0xffULL)); + result |= + (((Rs1 >> 8) & 0xffULL) << 48) + | (((Rs1 >> 48) & 0xffULL) << 8); + result |= + (((Rs1 >> 16) & 0xffULL) << 40) + | (((Rs1 >> 40) & 0xffULL) << 16); + result |= + (((Rs1 >> 24) & 0xffULL) << 32) + | (((Rs1 >> 32) & 0xffULL) << 24); + Rd = result; + }}, + imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); + } } } 0x6: ori({{ @@ -698,48 +733,42 @@ decode QUADRANT default Unknown::unknown() { Rd = rvSext(PC + (sext<20>(imm) << 12)); }}); - 0x06: decode FUNCT3 { - format IOp { - 0x0: decode RVTYPE { - 0x1: addiw({{ + 0x06: decode RVTYPE { + 0x1: decode FUNCT3 { + format IOp { + 0x0: addiw({{ Rd_sw = (int32_t)(Rs1_sw + imm); }}, int32_t); - } - 0x1: decode FS3 { - 0x0: decode RVTYPE { - 0x1: slliw({{ + 0x1: decode FS3 { + 0x0: slliw({{ Rd_sd = Rs1_sw << imm; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + 0x1: slli_uw({{ + Rd = ((uint64_t)(Rs1_uw)) << imm; + }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }}); + 0xc: decode FS2 { + 0x0: clzw({{ + Rd = clz32(Rs1); + }}); + 0x1: ctzw({{ + Rd = ctz32(Rs1); + }}); + 0x2: cpopw({{ + Rd = popCount(Rs1<31:0>); + }}); + } } - 0x1: slli_uw({{ - Rd = ((uint64_t)(Rs1_uw)) << imm; - }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); - 0xc: decode FS2 { - 0x0: clzw({{ - Rd = clz32(Rs1); - }}); - 0x1: ctzw({{ - Rd = ctz32(Rs1); - }}); - 0x2: cpopw({{ - Rd = popCount(Rs1<31:0>); - }}); - } - } - 0x5: decode FS3 { - 0x0: decode RVTYPE { - 0x1: srliw({{ + 0x5: decode FS3 { + 0x0: srliw({{ Rd_sd = (int32_t)(Rs1_uw >> imm); }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); - } - 0x8: decode RVTYPE { - 0x1: sraiw({{ + 0x8: sraiw({{ Rd_sd = Rs1_sw >> imm; }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); + 0xc: roriw({{ + Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1)))); + }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); } - 0xc: roriw({{ - Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1)))); - }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }}); } } } @@ -1026,28 +1055,30 @@ decode QUADRANT default Unknown::unknown() { } 0x5: clmul({{ uint64_t result = 0; - for (int i = 0; i < 64; i++) { + for (int i = 0; i < rvSelect(32, 64); i++) { if ((Rs2 >> i) & 1) { result ^= Rs1 << i; } } - Rd = result; + Rd = rvSext(result); }}); 0x14: bset({{ - Rs2 &= (64 - 1); - Rd = Rs1 | (UINT64_C(1) << Rs2); + Rs2 &= rvSelect(32 - 1, 64 - 1); + Rd = rvSext(Rs1 | (UINT64_C(1) << Rs2)); }}); 0x24: bclr({{ - Rs2 &= (64 - 1); - Rd = Rs1 & (~(UINT64_C(1) << Rs2)); + Rs2 &= rvSelect(32 - 1, 64 - 1); + Rd = rvSext(Rs1 & (~(UINT64_C(1) << Rs2))); }}); 0x30: rol({{ - int shamt = Rs2 & (64 - 1); - Rd = (Rs1 << shamt) | (Rs1 >> ((64 - shamt) & (64 - 1))); + uint64_t xlen = rvSelect(32, 64); + int shamt = Rs2 & (xlen - 1); + Rd = rvSext((Rs1 << shamt) + | (rvZext(Rs1) >> ((xlen - shamt) & (xlen - 1)))); }}); 0x34: binv({{ - Rs2 &= (64 - 1); - Rd = Rs1 ^ (UINT64_C(1) << Rs2); + Rs2 &= rvSelect(32 - 1, 64 - 1); + Rd = rvSext(Rs1 ^ (UINT64_C(1) << Rs2)); }}); } 0x2: decode FUNCT7 { @@ -1082,15 +1113,17 @@ decode QUADRANT default Unknown::unknown() { } 0x5: clmulr({{ uint64_t result = 0; - for (int i = 0; i < 64; i++) { + uint64_t xlen = rvSelect(32, 64); + uint64_t zextRs1 = rvZext(Rs1); + for (int i = 0; i < xlen; i++) { if ((Rs2 >> i) & 1) { - result ^= Rs1 >> (64-i-1); + result ^= zextRs1 >> (xlen-i-1); } } - Rd = result; + Rd = rvSext(result); }}); 0x10: sh1add({{ - Rd = (Rs1 << 1) + Rs2; + Rd = rvSext((Rs1 << 1) + Rs2); }}); 0x14: xperm4({{ Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd); @@ -1123,11 +1156,14 @@ decode QUADRANT default Unknown::unknown() { } 0x5: clmulh({{ uint64_t result = 0; - for (int i = 1; i < 64; i++) { + uint64_t xlen = rvSelect(32, 64); + uint64_t zextRs1 = rvZext(Rs1); + for (int i = 1; i < xlen; i++) { if ((Rs2 >> i) & 1) { - result ^= (Rs1 >> (64-i)); + result ^= zextRs1 >> (xlen-i); } } + // The MSB can never be 1, no need to sign extend. Rd = result; }}); } @@ -1159,17 +1195,22 @@ decode QUADRANT default Unknown::unknown() { } }}, IntDivOp); } + 0x4: decode RVTYPE { + 0x0: rv32_zext_h({{ + Rd = Rs1_uh; + }}); + } 0x5: min({{ - Rd = (((int64_t) Rs1) < ((int64_t) Rs2)) ? Rs1 : Rs2; + Rd_sd = std::min(rvSext(Rs1_sd), rvSext(Rs2_sd)); }}); 0x10: sh2add({{ - Rd = (Rs1 << 2) + Rs2; + Rd = rvSext((Rs1 << 2) + Rs2); }}); 0x14: xperm8({{ Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd); }}); 0x20: xnor({{ - Rd = ~(Rs1 ^ Rs2); + Rd = rvSext(~(Rs1 ^ Rs2)); }}); } 0x5: decode FUNCT7 { @@ -1197,15 +1238,18 @@ decode QUADRANT default Unknown::unknown() { Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>); }}); 0x5: minu({{ - Rd = Rs1 < Rs2 ? Rs1 : Rs2; + Rd = rvSext(std::min(rvZext(Rs1), rvZext(Rs2))); }}); 0x24: bext({{ - Rs2 &= (64 - 1); + Rs2 &= (rvSelect(32, 64) - 1); + // It doesn't need to sign ext because MSB is always 0 Rd = (Rs1 >> Rs2) & 0x1; }}); 0x30: ror({{ - int shamt = Rs2 & (64 - 1); - Rd = (Rs1 >> shamt) | (Rs1 << ((64 - shamt) & (64 - 1))); + uint64_t xlen = rvSelect(32, 64); + int shamt = Rs2 & (xlen - 1); + Rd = rvSext((rvZext(Rs1) >> shamt) + | (Rs1 << ((xlen - shamt) & (xlen - 1)))); }}); } 0x6: decode FUNCT7 { @@ -1237,13 +1281,13 @@ decode QUADRANT default Unknown::unknown() { }}, IntDivOp); } 0x5: max({{ - Rd = (((int64_t) Rs1) > ((int64_t) Rs2)) ? Rs1 : Rs2; + Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd)); }}); 0x10: sh3add({{ - Rd = (Rs1 << 3) + Rs2; + Rd = rvSext((Rs1 << 3) + Rs2); }}); 0x20: orn({{ - Rd = Rs1 | (~Rs2); + Rd = rvSext(Rs1 | (~Rs2)); }}); } 0x7: decode FUNCT7 { @@ -1267,10 +1311,10 @@ decode QUADRANT default Unknown::unknown() { }}, IntDivOp); } 0x5: maxu({{ - Rd = Rs1 > Rs2 ? Rs1 : Rs2; + Rd = rvSext(std::max(rvZext(Rs1), rvZext(Rs2))); }}); 0x20: andn({{ - Rd = Rs1 & (~Rs2); + Rd = rvSext(Rs1 & (~Rs2)); }}); } } @@ -1280,46 +1324,38 @@ decode QUADRANT default Unknown::unknown() { Rd = (sext<20>(imm) << 12); }}); - 0x0e: decode FUNCT3 { - format ROp { - 0x0: decode FUNCT7 { - 0x0: decode RVTYPE { - 0x1: addw({{ + 0x0e: decode RVTYPE { + 0x1: decode FUNCT3 { + format ROp { + 0x0: decode FUNCT7 { + 0x0: addw({{ Rd_sd = Rs1_sw + Rs2_sw; }}); - } - 0x1: decode RVTYPE { 0x1: mulw({{ Rd_sd = (int32_t)(Rs1_sw*Rs2_sw); }}, IntMultOp); - } - 0x4: add_uw({{ - Rd = Rs1_uw + Rs2; - }}); - 0x20: decode RVTYPE { - 0x1: subw({{ + 0x4: add_uw({{ + Rd = Rs1_uw + Rs2; + }}); + 0x20: subw({{ Rd_sd = Rs1_sw - Rs2_sw; }}); } - } - 0x1: decode FUNCT7 { - 0x0: decode RVTYPE { - 0x1: sllw({{ + 0x1: decode FUNCT7 { + 0x0: sllw({{ Rd_sd = Rs1_sw << Rs2<4:0>; }}); + 0x30: rolw({{ + int shamt = Rs2 & (32 - 1); + Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1)))); + }}); } - 0x30: rolw({{ - int shamt = Rs2 & (32 - 1); - Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1)))); - }}); - } - 0x2: decode FUNCT7 { - 0x10: sh1add_uw({{ - Rd = (((uint64_t)Rs1_uw) << 1) + Rs2; - }}); - } - 0x4: decode FUNCT7 { - 0x1: decode RVTYPE { + 0x2: decode FUNCT7 { + 0x10: sh1add_uw({{ + Rd = (((uint64_t)Rs1_uw) << 1) + Rs2; + }}); + } + 0x4: decode FUNCT7 { 0x1: divw({{ constexpr int32_t kRsMin = \ std::numeric_limits::min(); @@ -1331,21 +1367,17 @@ decode QUADRANT default Unknown::unknown() { Rd_sd = Rs1_sw/Rs2_sw; } }}, IntDivOp); - } - 0x4: zext_h({{ - Rd = Rs1_uh; - }}); - 0x10: sh2add_uw({{ - Rd = (((uint64_t)Rs1_uw) << 2) + Rs2; - }}); - } - 0x5: decode FUNCT7 { - 0x0: decode RVTYPE { - 0x1: srlw({{ - Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>); + 0x4: zext_h({{ + Rd = Rs1_uh; + }}); + 0x10: sh2add_uw({{ + Rd = (((uint64_t)Rs1_uw) << 2) + Rs2; }}); } - 0x1: decode RVTYPE { + 0x5: decode FUNCT7 { + 0x0: srlw({{ + Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>); + }}); 0x1: divuw({{ if (Rs2_uw == 0) { Rd_sd = std::numeric_limits::max(); @@ -1353,19 +1385,15 @@ decode QUADRANT default Unknown::unknown() { Rd_sd = (int32_t)(Rs1_uw/Rs2_uw); } }}, IntDivOp); - } - 0x20: decode RVTYPE { - 0x1: sraw({{ + 0x20: sraw({{ Rd_sd = Rs1_sw >> Rs2<4:0>; }}); + 0x30: rorw({{ + int shamt = Rs2 & (32 - 1); + Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1)))); + }}); } - 0x30: rorw({{ - int shamt = Rs2 & (32 - 1); - Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1)))); - }}); - } - 0x6: decode FUNCT7 { - 0x1: decode RVTYPE { + 0x6: decode FUNCT7 { 0x1: remw({{ constexpr int32_t kRsMin = \ std::numeric_limits::min(); @@ -1377,13 +1405,11 @@ decode QUADRANT default Unknown::unknown() { Rd_sd = Rs1_sw%Rs2_sw; } }}, IntDivOp); + 0x10: sh3add_uw({{ + Rd = (((uint64_t)Rs1_uw) << 3) + Rs2; + }}); } - 0x10: sh3add_uw({{ - Rd = (((uint64_t)Rs1_uw) << 3) + Rs2; - }}); - } - 0x7: decode RVTYPE { - 0x1: remuw({{ + 0x7: remuw({{ if (Rs2_uw == 0) { Rd_sd = (int32_t)Rs1_uw; } else { From f96513fd042c3a1843eb4a3131d08b0fe0aa947f Mon Sep 17 00:00:00 2001 From: Emin Gadzhiev Date: Fri, 2 Dec 2022 02:18:44 +0300 Subject: [PATCH 057/492] sim,sim-se: Fix restoring of VMAs of memory-mapped files This patch fixes a problem that occurs when restoring from a checkpoint where Mapped File Buffers are not restored. This causes errors and unexpected behavior during further execution. Since the checkpoint already has the size of the area (address range) and the file name, only the offset is missing to restore the Mapped File Buffer. Having the offset value, it's possible to open those files for which an offset is specified and create a VMA with a Mapped File Buffer. Change-Id: Ib9dfa174cda6348b966b892184c36daeaba80e81 Signed-off-by: Emin Gadzhiev Issue-On: https://gem5.atlassian.net/browse/GEM5-1302 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66311 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/sim/mem_state.hh | 18 +++++++++++++++++- src/sim/vma.cc | 2 +- src/sim/vma.hh | 6 ++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/sim/mem_state.hh b/src/sim/mem_state.hh index 05f2239f96..b2b50d0760 100644 --- a/src/sim/mem_state.hh +++ b/src/sim/mem_state.hh @@ -29,6 +29,9 @@ #ifndef SRC_SIM_MEM_STATE_HH #define SRC_SIM_MEM_STATE_HH +#include +#include + #include #include #include @@ -199,6 +202,9 @@ class MemState : public Serializable for (auto vma : _vmaList) { ScopedCheckpointSection sec(cp, csprintf("Vma%d", count++)); paramOut(cp, "name", vma.getName()); + if (vma.hasHostBuf()) { + paramOut(cp, "fileOffset", vma.getFileMappingOffset()); + } paramOut(cp, "addrRangeStart", vma.start()); paramOut(cp, "addrRangeEnd", vma.end()); } @@ -223,10 +229,20 @@ class MemState : public Serializable std::string name; Addr start; Addr end; + off_t offset = 0; + int host_fd = -1; paramIn(cp, "name", name); + if (optParamIn(cp, "fileOffset", offset, false)) { + host_fd = open(name.c_str(), O_RDONLY); + fatal_if(host_fd < 0, + "Failed to open %s file " + "while unserializing file-backed VMA\n", name); + } paramIn(cp, "addrRangeStart", start); paramIn(cp, "addrRangeEnd", end); - _vmaList.emplace_back(AddrRange(start, end), _pageBytes, name); + _vmaList.emplace_back(AddrRange(start, end), _pageBytes, name, + host_fd, offset); + close(host_fd); } } diff --git a/src/sim/vma.cc b/src/sim/vma.cc index 7e5ed1c491..ff5a4fe3cc 100644 --- a/src/sim/vma.cc +++ b/src/sim/vma.cc @@ -120,7 +120,7 @@ VMA::sanityCheck() VMA::MappedFileBuffer::MappedFileBuffer(int fd, size_t length, off_t offset) - : _buffer(nullptr), _length(length) + : _buffer(nullptr), _length(length), _offset(offset) { panic_if(_length == 0, "Tried to mmap file of length zero"); diff --git a/src/sim/vma.hh b/src/sim/vma.hh index b238a2e416..8f2a77f36c 100644 --- a/src/sim/vma.hh +++ b/src/sim/vma.hh @@ -105,6 +105,10 @@ class VMA void sliceRegionLeft(Addr slice_addr); const std::string& getName() { return _vmaName; } + off_t getFileMappingOffset() const + { + return hasHostBuf() ? _origHostBuf->getOffset() : 0; + } /** * Defer AddrRange related calls to the AddrRange. @@ -191,10 +195,12 @@ class VMA void *getBuffer() const { return _buffer; } uint64_t getLength() const { return _length; } + off_t getOffset() const { return _offset; } private: void *_buffer; // Host buffer ptr size_t _length; // Length of host ptr + off_t _offset; // Offset in file at which mapping starts }; }; From 8b1688da340e5573a0c6f00c835f08ac5f73f963 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sat, 10 Dec 2022 02:29:43 -0800 Subject: [PATCH 058/492] dev: Introduce a reset() method on RegisterBank and Register classes. This will make it much easier to implement reset behaviors on devices which have RegisterBanks in them. Change-Id: I73fe9874fcb69feed33611a320dcca85c0de2d0e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66671 Tested-by: kokoro Maintainer: Gabe Black Reviewed-by: Yu-hsin Wang Reviewed-by: Jui-min Lee --- src/dev/reg_bank.hh | 41 ++++++++++++++++++++++++++++++++++++-- src/dev/serial/uart8250.hh | 7 +++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh index 31c0ce5b66..66d668b338 100644 --- a/src/dev/reg_bank.hh +++ b/src/dev/reg_bank.hh @@ -117,6 +117,11 @@ * RegisterBankLE and RegisterBankBE aliases to make it a little easier to * refer to one or the other version. * + * A RegisterBank also has a reset() method which will (by default) call the + * reset() method on each register within it. This method is virtual, and so + * can be overridden if something additional or different needs to be done to + * reset the hardware model. + * * * == Register interface == * @@ -145,6 +150,12 @@ * it still has to implement these methods, but they don't have to actually do * anything. * + * Each register also has a "reset" method, which will reset the register as + * if its containing device is being reset. By default, this will just restore + * the initial value of the register, but can be overridden to implement + * additional behavior like resetting other aspects of the device which are + * controlled by the value of the register. + * * * == Basic Register types == * @@ -360,6 +371,9 @@ class RegisterBank : public RegisterBankBase // Methods for implementing serialization for checkpoints. virtual void serialize(std::ostream &os) const = 0; virtual bool unserialize(const std::string &s) = 0; + + // Reset the register. + virtual void reset() = 0; }; // Filler registers which return a fixed pattern. @@ -388,6 +402,9 @@ class RegisterBank : public RegisterBankBase void serialize(std::ostream &os) const override {} bool unserialize(const std::string &s) override { return true; } + + // Resetting a read only register doesn't need to do anything. + void reset() override {} }; // Register which reads as all zeroes. @@ -453,6 +470,10 @@ class RegisterBank : public RegisterBankBase void serialize(std::ostream &os) const override {} bool unserialize(const std::string &s) override { return true; } + // Assume since the buffer is managed externally, it will be reset + // externally. + void reset() override {} + protected: /** * This method exists so that derived classes that need to initialize @@ -516,6 +537,8 @@ class RegisterBank : public RegisterBankBase return true; } + + void reset() override { buffer = std::array{}; } }; template @@ -534,6 +557,7 @@ class RegisterBank : public RegisterBankBase private: Data _data = {}; + Data _resetData = {}; Data _writeMask = mask(sizeof(Data) * 8); ReadFunc _reader = defaultReader; @@ -602,11 +626,13 @@ class RegisterBank : public RegisterBankBase // Constructor and move constructor with an initial data value. constexpr Register(const std::string &new_name, const Data &new_data) : - RegisterBase(new_name, sizeof(Data)), _data(new_data) + RegisterBase(new_name, sizeof(Data)), _data(new_data), + _resetData(new_data) {} constexpr Register(const std::string &new_name, const Data &&new_data) : - RegisterBase(new_name, sizeof(Data)), _data(new_data) + RegisterBase(new_name, sizeof(Data)), _data(new_data), + _resetData(new_data) {} // Set which bits of the register are writeable. @@ -789,6 +815,9 @@ class RegisterBank : public RegisterBankBase { return ParseParam::parse(s, get()); } + + // Reset our data to its initial value. + void reset() override { get() = _resetData; } }; private: @@ -984,6 +1013,14 @@ class RegisterBank : public RegisterBankBase } } } + + // By default, reset all the registers in the bank. + virtual void + reset() + { + for (auto &[offset, reg]: _offsetMap) + reg.get().reset(); + } }; using RegisterBankLE = RegisterBank; diff --git a/src/dev/serial/uart8250.hh b/src/dev/serial/uart8250.hh index c55d889c82..5774f78aab 100644 --- a/src/dev/serial/uart8250.hh +++ b/src/dev/serial/uart8250.hh @@ -113,6 +113,13 @@ class Uart8250 : public Uart void serialize(std::ostream &os) const override {} bool unserialize(const std::string &s) override { return true; } + + void + reset() override + { + _reg1.reset(); + _reg2.reset(); + } }; class BankedRegister : public PairedRegister From 7a21ecf15cddb2dcd545574bfeb530d2d7dcee13 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Dec 2022 23:51:14 -0800 Subject: [PATCH 059/492] dev: Implement a "Signal" port which has a templated State type. This port type transmits a value of the templated State type. When the value changes, the sink port will call the registered callback with the new value. Change-Id: I72eaf74658a2c63bece95e48c1a72694874eaad8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66672 Maintainer: Gabe Black Reviewed-by: Jui-min Lee Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/sim/signal.hh | 131 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/sim/signal.hh diff --git a/src/sim/signal.hh b/src/sim/signal.hh new file mode 100644 index 0000000000..3cb3f62c0d --- /dev/null +++ b/src/sim/signal.hh @@ -0,0 +1,131 @@ +/* + * Copyright 2022 Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __SIM_SIGNAL_HH__ +#define __SIM_SIGNAL_HH__ + +#include + +#include "base/logging.hh" +#include "sim/port.hh" + +namespace gem5 +{ + +template +class SignalSourcePort; + +template +class SignalSinkPort : public Port +{ + public: + using OnChangeFunc = std::function; + + private: + friend SignalSourcePort; + + SignalSourcePort *_source = nullptr; + + State _state = {}; + OnChangeFunc _onChange; + + protected: + void + set(const State &new_state) + { + if (new_state == _state) + return; + + _state = new_state; + if (_onChange) + _onChange(_state); + } + + public: + SignalSinkPort(const std::string &_name, PortID _id=InvalidPortID) : + Port(_name, _id) + {} + + const State &state() const { return _state; } + void onChange(OnChangeFunc func) { _onChange = std::move(func); } + + void + bind(Port &peer) override + { + _source = dynamic_cast *>(&peer); + fatal_if(!_source, "Attempt to bind signal pin %s to " + "incompatible pin %s", name(), peer.name()); + Port::bind(peer); + } + void + unbind() override + { + _source = nullptr; + Port::unbind(); + } +}; + +template +class SignalSourcePort : public Port +{ + private: + SignalSinkPort *sink = nullptr; + State _state = {}; + + public: + SignalSourcePort(const std::string &_name, PortID _id=InvalidPortID) : + Port(_name, _id) + {} + + void + set(const State &new_state) + { + _state = new_state; + sink->set(new_state); + } + + const State &state() const { return _state; } + + void + bind(Port &peer) override + { + sink = dynamic_cast *>(&peer); + fatal_if(!sink, "Attempt to bind signal pin %s to " + "incompatible pin %s", name(), peer.name()); + Port::bind(peer); + } + void + unbind() override + { + sink = nullptr; + Port::unbind(); + } +}; + +} // namespace gem5 + +#endif //__SIM_SIGNAL_HH__ From 89d5bfca7ce79bbce1f01471c578978d029b8f22 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 12 Dec 2022 23:59:21 -0800 Subject: [PATCH 060/492] fastmodel,dev: Rework the Int*Pin classes with Signal*Port. These are largely compatibility wrappers around the Signal*Port classes. The python versions of these types enforce more specific compatibility, but on the c++ side the Signal*Port classes can be used directly instead. Change-Id: I1325074d0ed1c8fc6dfece5ac1ee33872cc4f5e3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66673 Maintainer: Gabe Black Reviewed-by: Yu-hsin Wang Tested-by: kokoro --- .../arm/fastmodel/common/signal_sender.hh | 10 +- src/dev/SConscript | 1 - src/dev/intpin.cc | 67 ----------- src/dev/intpin.hh | 109 +++++++----------- 4 files changed, 48 insertions(+), 139 deletions(-) delete mode 100644 src/dev/intpin.cc diff --git a/src/arch/arm/fastmodel/common/signal_sender.hh b/src/arch/arm/fastmodel/common/signal_sender.hh index f4772cfad6..812a1b95c8 100644 --- a/src/arch/arm/fastmodel/common/signal_sender.hh +++ b/src/arch/arm/fastmodel/common/signal_sender.hh @@ -50,11 +50,11 @@ class SignalSender : public IntSinkPinBase SignalSender(const std::string &_name, PortID _id) : IntSinkPinBase(_name, _id, 0), signal_out((_name + ".sig").c_str()) - {} - - private: - void raiseOnDevice() override { signal_out.set_state(id, true); } - void lowerOnDevice() override { signal_out.set_state(id, false); } + { + onChange([this](const bool &new_val) { + signal_out.set_state(id, new_val); + }); + } }; } // namespace fastmodel diff --git a/src/dev/SConscript b/src/dev/SConscript index 6a6ce40dcc..d991ed53a9 100644 --- a/src/dev/SConscript +++ b/src/dev/SConscript @@ -36,7 +36,6 @@ Source('dma_device.cc') Source('dma_virt_device.cc') SimObject('IntPin.py', sim_objects=[]) -Source('intpin.cc') SimObject('ResetPort.py', sim_objects=[]) Source('reset_port.cc') diff --git a/src/dev/intpin.cc b/src/dev/intpin.cc deleted file mode 100644 index c378337d4a..0000000000 --- a/src/dev/intpin.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2019 Google, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "dev/intpin.hh" - -#include "base/logging.hh" - -namespace gem5 -{ - -void -IntSinkPinBase::bind(Port &peer) -{ - source = dynamic_cast(&peer); - fatal_if(!source, "Attempt to bind interrupt sink pin %s to " - "incompatible port %s.", name(), peer.name()); - Port::bind(peer); -} - -void -IntSinkPinBase::unbind() -{ - source = nullptr; - Port::unbind(); -} - -void -IntSourcePinBase::bind(Port &peer) -{ - sink = dynamic_cast(&peer); - fatal_if(!sink, "Attempt to bind interrupt source pin %s to " - "incompatible port %s.", name(), peer.name()); - Port::bind(peer); -} - -void -IntSourcePinBase::unbind() -{ - sink = nullptr; - Port::unbind(); -} - -} // namespace gem5 diff --git a/src/dev/intpin.hh b/src/dev/intpin.hh index 2704887f35..578dd90d9c 100644 --- a/src/dev/intpin.hh +++ b/src/dev/intpin.hh @@ -28,94 +28,71 @@ #ifndef __DEV_INTPIN_HH__ #define __DEV_INTPIN_HH__ -#include "sim/port.hh" +#include + +#include "sim/signal.hh" namespace gem5 { -class IntSourcePinBase; - -class IntSinkPinBase : public Port +class IntSinkPinBase : public SignalSinkPort { - protected: - friend IntSourcePinBase; + private: + const int _number = 0; - IntSourcePinBase *source = nullptr; + public: - int _number = 0; - bool _state = false; + template + IntSinkPinBase(const std::string &_name, PortID _id, Device *dev, + int num) : + SignalSinkPort(_name, _id), _number(num) + { + onChange([dev, num](const bool &new_val) { + if (new_val) + dev->raiseInterruptPin(num); + else + dev->lowerInterruptPin(num); + }); + } + + template + IntSinkPinBase(const std::string &_name, PortID _id, Device *dev) : + IntSinkPinBase(_name, _id, dev, _id) + {} IntSinkPinBase(const std::string &_name, PortID _id, int num) : - Port(_name, _id), _number(num) + SignalSinkPort(_name, _id), _number(num) {} - virtual void raiseOnDevice() = 0; - virtual void lowerOnDevice() = 0; + IntSinkPinBase(const std::string &_name, PortID _id) : + IntSinkPinBase(_name, _id, _id) + {} - void - raise() - { - _state = true; - raiseOnDevice(); - } - - void - lower() - { - _state = false; - lowerOnDevice(); - } - - public: int number() { return _number; } - bool state() { return _state; } - - void bind(Port &peer) override; - void unbind() override; }; -template -class IntSinkPin : public IntSinkPinBase +template +using IntSinkPin = IntSinkPinBase; + +class IntSourcePinBase : public SignalSourcePort { - private: - Device *device = nullptr; - - void raiseOnDevice() override { device->raiseInterruptPin(number()); } - void lowerOnDevice() override { device->lowerInterruptPin(number()); } - public: - IntSinkPin(const std::string &_name, PortID _id, Device *dev, int num) : - IntSinkPinBase(_name, _id, num), device(dev) {} - IntSinkPin(const std::string &_name, PortID _id, Device *dev) : - IntSinkPin(_name, _id, dev, _id) {} -}; - -class IntSourcePinBase : public Port -{ - private: - IntSinkPinBase *sink = nullptr; - - public: - IntSourcePinBase(const std::string &_name, PortID _id): - Port(_name, _id) + template + IntSourcePinBase(const std::string &_name, PortID _id, Device *owner) : + SignalSourcePort(_name, _id) {} - void raise() { sink->raise(); } - void lower() { sink->lower(); } - - void bind(Port &peer) override; - void unbind() override; -}; - -template -class IntSourcePin : public IntSourcePinBase -{ - public: - IntSourcePin(const std::string &_name, PortID _id, Device *owner) : - IntSourcePinBase(_name, _id) + IntSourcePinBase(const std::string &_name, PortID _id) : + SignalSourcePort(_name, _id) {} + + void raise() { set(true); } + void lower() { set(false); } }; +template +using IntSourcePin = IntSourcePinBase; + } // namespace gem5 #endif //__DEV_INTPIN_HH__ From 0aaaa6b4ae61c7a57c1a44d1c615dcafc057e0a8 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 13 Dec 2022 00:32:08 -0800 Subject: [PATCH 061/492] fastmodel: Change the Signal proxies to use Signal*Port. Change-Id: Ia1aa32d5ea50ff4cc47d1d72a9c25dabd6c30de9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66674 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/arch/arm/fastmodel/common/signal_receiver.hh | 6 +++--- src/arch/arm/fastmodel/common/signal_sender.hh | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/arch/arm/fastmodel/common/signal_receiver.hh b/src/arch/arm/fastmodel/common/signal_receiver.hh index 990787743b..9ec760e32b 100644 --- a/src/arch/arm/fastmodel/common/signal_receiver.hh +++ b/src/arch/arm/fastmodel/common/signal_receiver.hh @@ -87,7 +87,7 @@ class SignalReceiver : public amba_pv::signal_slave_base class SignalReceiverInt : public SignalReceiver { public: - using IntPin = IntSourcePin; + using IntPin = SignalSourcePort; explicit SignalReceiverInt(const std::string &name) : SignalReceiver(name) @@ -95,7 +95,7 @@ class SignalReceiverInt : public SignalReceiver onChange([this](bool status) { for (auto &signal : signalOut) { if (signal && signal->isConnected()) - status ? signal->raise() : signal->lower(); + signal->set(status); } }); } @@ -108,7 +108,7 @@ class SignalReceiverInt : public SignalReceiver } if (!signalOut[idx]) { signalOut[idx] = std::make_unique( - csprintf("%s.signalOut[%d]", get_name(), idx), idx, this); + csprintf("%s.signalOut[%d]", get_name(), idx), idx); } return *signalOut[idx]; } diff --git a/src/arch/arm/fastmodel/common/signal_sender.hh b/src/arch/arm/fastmodel/common/signal_sender.hh index 812a1b95c8..c596ed108e 100644 --- a/src/arch/arm/fastmodel/common/signal_sender.hh +++ b/src/arch/arm/fastmodel/common/signal_sender.hh @@ -33,7 +33,7 @@ #include #pragma GCC diagnostic pop -#include "dev/intpin.hh" +#include "sim/signal.hh" namespace gem5 { @@ -42,14 +42,13 @@ GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { -class SignalSender : public IntSinkPinBase +class SignalSender : public SignalSinkPort { public: amba_pv::signal_master_port signal_out; SignalSender(const std::string &_name, PortID _id) : - IntSinkPinBase(_name, _id, 0), - signal_out((_name + ".sig").c_str()) + SignalSinkPort(_name, _id), signal_out((_name + ".sig").c_str()) { onChange([this](const bool &new_val) { signal_out.set_state(id, new_val); From fbd0722de4f4929f2d26f98f92427e33ef4fd775 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 13 Dec 2022 02:17:22 -0800 Subject: [PATCH 062/492] fastmodel,dev: Replace the reset port with a Signal*Port. The ResetRequestPort and ResetResponsePort have a few problems: 1. A reset signal should happen during the time a reset is asserted, or in other words the device should stay in reset and not doing anything while reset is asserted. It should not immediately restart execution while the reset is still held. 2. These names are misleading, since there is no response. These names are inherited from other port types where there is an actual response. There is a new generic SignalSourcePort and SignalSinkPort set of port classes which are templated on the type of signal they propogate, and which can be used in place of reset ports in c++. These ports can still have a specialized role which will ensure that only reset ports are connected to each other for a form of type checking, although the underlying c++ instances are more interoperable than that. Change-Id: Id98bef901ab61ac5b200dbbe49439bb2d2e6c57f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66675 Maintainer: Gabe Black Reviewed-by: Yu-hsin Wang Tested-by: kokoro --- src/arch/arm/fastmodel/CortexA76/evs.cc | 29 +++----- src/arch/arm/fastmodel/CortexA76/evs.hh | 6 +- src/arch/arm/fastmodel/CortexR52/evs.cc | 12 +++- src/arch/arm/fastmodel/CortexR52/evs.hh | 20 +----- .../arm/fastmodel/reset_controller/example.cc | 16 ++--- .../arm/fastmodel/reset_controller/example.hh | 7 +- src/dev/SConscript | 2 - src/dev/reset_port.cc | 57 --------------- src/dev/reset_port.hh | 72 ------------------- 9 files changed, 33 insertions(+), 188 deletions(-) delete mode 100644 src/dev/reset_port.cc delete mode 100644 src/dev/reset_port.hh diff --git a/src/arch/arm/fastmodel/CortexA76/evs.cc b/src/arch/arm/fastmodel/CortexA76/evs.cc index 1c069351ca..c9ce3cc656 100644 --- a/src/arch/arm/fastmodel/CortexA76/evs.cc +++ b/src/arch/arm/fastmodel/CortexA76/evs.cc @@ -70,23 +70,6 @@ ScxEvsCortexA76::setResetAddr(int core, Addr addr, bool secure) this->rvbaraddr[core]->set_state(0, addr); } -template -void -ScxEvsCortexA76::requestReset() -{ - // Reset all cores. - for (auto &poweron_reset : this->poweron_reset) { - poweron_reset->signal_out.set_state(0, true); - poweron_reset->signal_out.set_state(0, false); - } - // Reset DSU. - this->top_reset.signal_out.set_state(0, true); - this->top_reset.signal_out.set_state(0, false); - // Reset debug APB. - this->dbg_reset.signal_out.set_state(0, true); - this->dbg_reset.signal_out.set_state(0, false); -} - template ScxEvsCortexA76::ScxEvsCortexA76( const sc_core::sc_module_name &mod_name, const Params &p) : @@ -94,9 +77,19 @@ ScxEvsCortexA76::ScxEvsCortexA76( amba(Base::amba, p.name + ".amba", -1), top_reset(p.name + ".top_reset", 0), dbg_reset(p.name + ".dbg_reset", 0), - model_reset(p.name + ".model_reset", -1, this), + model_reset(p.name + ".model_reset"), params(p) { + model_reset.onChange([this](const bool &new_val) { + // Set reset for all cores. + for (auto &poweron_reset : poweron_reset) + poweron_reset->signal_out.set_state(0, new_val); + // Set reset for DSU. + top_reset.signal_out.set_state(0, new_val); + // Set reset for debug APB. + dbg_reset.signal_out.set_state(0, new_val); + }); + for (int i = 0; i < CoreCount; i++) { redist.emplace_back(new TlmGicTarget(this->redistributor[i], csprintf("%s.redistributor[%d]", name(), i), i)); diff --git a/src/arch/arm/fastmodel/CortexA76/evs.hh b/src/arch/arm/fastmodel/CortexA76/evs.hh index 081e80f701..7c4ef601a7 100644 --- a/src/arch/arm/fastmodel/CortexA76/evs.hh +++ b/src/arch/arm/fastmodel/CortexA76/evs.hh @@ -35,7 +35,6 @@ #include "arch/arm/fastmodel/common/signal_sender.hh" #include "arch/arm/fastmodel/iris/cpu.hh" #include "arch/arm/fastmodel/protocol/exported_clock_rate_control.hh" -#include "dev/reset_port.hh" #include "mem/port_proxy.hh" #include "params/FastModelScxEvsCortexA76x1.hh" #include "params/FastModelScxEvsCortexA76x2.hh" @@ -45,6 +44,7 @@ #include "scx_evs_CortexA76x2.h" #include "scx_evs_CortexA76x3.h" #include "scx_evs_CortexA76x4.h" +#include "sim/signal.hh" #include "systemc/ext/core/sc_event.hh" #include "systemc/ext/core/sc_module.hh" #include "systemc/tlm_port_wrapper.hh" @@ -99,7 +99,7 @@ class ScxEvsCortexA76 : public Types::Base, public Iris::BaseCpuEvs SignalSender dbg_reset; - ResetResponsePort model_reset; + SignalSinkPort model_reset; CortexA76Cluster *gem5CpuCluster; @@ -129,8 +129,6 @@ class ScxEvsCortexA76 : public Types::Base, public Iris::BaseCpuEvs void setCluster(SimObject *cluster) override; void setResetAddr(int core, Addr addr, bool secure) override; - - void requestReset(); }; struct ScxEvsCortexA76x1Types diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc index 734323e026..0ad3f18412 100644 --- a/src/arch/arm/fastmodel/CortexR52/evs.cc +++ b/src/arch/arm/fastmodel/CortexR52/evs.cc @@ -101,9 +101,19 @@ ScxEvsCortexR52::ScxEvsCortexR52( ext_slave(Base::ext_slave, p.name + ".ext_slave", -1), top_reset(p.name + ".top_reset", 0), dbg_reset(p.name + ".dbg_reset", 0), - model_reset(p.name + ".model_reset", -1, this), + model_reset(p.name + ".model_reset"), params(p) { + model_reset.onChange([this](const bool &new_val) { + // Set reset for all cores. + for (auto &core_pin : corePins) + core_pin->poweron_reset.signal_out.set_state(0, new_val); + // Set reset for L2 system. + top_reset.signal_out.set_state(0, new_val); + // Set reset for debug APB. + dbg_reset.signal_out.set_state(0, new_val); + }); + for (int i = 0; i < CoreCount; i++) corePins.emplace_back(new CorePins(this, i)); diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh index 02ef1ae257..9cebec3846 100644 --- a/src/arch/arm/fastmodel/CortexR52/evs.hh +++ b/src/arch/arm/fastmodel/CortexR52/evs.hh @@ -37,7 +37,6 @@ #include "arch/arm/fastmodel/protocol/exported_clock_rate_control.hh" #include "arch/arm/fastmodel/protocol/signal_interrupt.hh" #include "dev/intpin.hh" -#include "dev/reset_port.hh" #include "mem/port_proxy.hh" #include "params/FastModelScxEvsCortexR52x1.hh" #include "params/FastModelScxEvsCortexR52x2.hh" @@ -47,6 +46,7 @@ #include "scx_evs_CortexR52x2.h" #include "scx_evs_CortexR52x3.h" #include "scx_evs_CortexR52x4.h" +#include "sim/signal.hh" #include "systemc/ext/core/sc_event.hh" #include "systemc/ext/core/sc_module.hh" #include "systemc/tlm_port_wrapper.hh" @@ -127,7 +127,7 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs SignalSender dbg_reset; - ResetResponsePort model_reset; + SignalSinkPort model_reset; CortexR52Cluster *gem5CpuCluster; @@ -149,22 +149,6 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs this->signalInterrupt->spi(num, false); } - void - requestReset() - { - // Reset all cores. - for (auto &core_pin : corePins) { - core_pin->poweron_reset.signal_out.set_state(0, true); - core_pin->poweron_reset.signal_out.set_state(0, false); - } - // Reset L2 system. - this->top_reset.signal_out.set_state(0, true); - this->top_reset.signal_out.set_state(0, false); - // Reset debug APB. - this->dbg_reset.signal_out.set_state(0, true); - this->dbg_reset.signal_out.set_state(0, false); - } - Port &gem5_getPort(const std::string &if_name, int idx) override; void diff --git a/src/arch/arm/fastmodel/reset_controller/example.cc b/src/arch/arm/fastmodel/reset_controller/example.cc index 33769acb30..04dfa3bf10 100644 --- a/src/arch/arm/fastmodel/reset_controller/example.cc +++ b/src/arch/arm/fastmodel/reset_controller/example.cc @@ -37,8 +37,8 @@ namespace fastmodel { ResetControllerExample::CorePins::CorePins(const std::string &module_name) - : reset(module_name + ".reset", 0, this), - halt(module_name + ".halt", 0, this) + : reset(module_name + ".reset"), + halt(module_name + ".halt") {} ResetControllerExample::Registers::Registers( @@ -65,22 +65,14 @@ ResetControllerExample::Registers::Registers( { panic_if(!pins->reset.isConnected(), "%s is not connected.", pins->reset.name()); - - if (val) - pins->reset.raise(); - else - pins->reset.lower(); + pins->reset.set(val); }); halt.writer( [this] (auto ®, auto val) { panic_if(!pins->halt.isConnected(), "%s is not connected.", pins->halt.name()); - - if (val) - pins->halt.raise(); - else - pins->halt.lower(); + pins->halt.set(val); }); addRegisters({ diff --git a/src/arch/arm/fastmodel/reset_controller/example.hh b/src/arch/arm/fastmodel/reset_controller/example.hh index 2805d6f077..af663236d5 100644 --- a/src/arch/arm/fastmodel/reset_controller/example.hh +++ b/src/arch/arm/fastmodel/reset_controller/example.hh @@ -31,11 +31,11 @@ #include #include "arch/arm/fastmodel/iris/cpu.hh" -#include "dev/intpin.hh" #include "dev/io_device.hh" #include "dev/reg_bank.hh" #include "mem/packet_access.hh" #include "params/FastModelResetControllerExample.hh" +#include "sim/signal.hh" namespace gem5 { @@ -48,9 +48,8 @@ class ResetControllerExample : public BasicPioDevice private: struct CorePins { - using CoreInt = IntSourcePin; - CoreInt reset; - CoreInt halt; + SignalSourcePort reset; + SignalSourcePort halt; explicit CorePins(const std::string &); }; diff --git a/src/dev/SConscript b/src/dev/SConscript index d991ed53a9..a7714a22d7 100644 --- a/src/dev/SConscript +++ b/src/dev/SConscript @@ -36,9 +36,7 @@ Source('dma_device.cc') Source('dma_virt_device.cc') SimObject('IntPin.py', sim_objects=[]) - SimObject('ResetPort.py', sim_objects=[]) -Source('reset_port.cc') DebugFlag('IsaFake') DebugFlag('DMA') diff --git a/src/dev/reset_port.cc b/src/dev/reset_port.cc deleted file mode 100644 index 8d32c7d93c..0000000000 --- a/src/dev/reset_port.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2022 Google, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "dev/reset_port.hh" - -#include "base/logging.hh" - -namespace gem5 -{ - -void -ResetRequestPort::bind(Port &p) -{ - peer = dynamic_cast(&p); - fatal_if(peer == nullptr, "Attempt to bind reset request port %s to " - "incompatible port %s.", name(), p.name()); - Port::bind(p); -} - -void -ResetRequestPort::unbind() -{ - peer = nullptr; - Port::unbind(); -} - -void -ResetRequestPort::requestReset() -{ - peer->requestReset(); -} - -} // namespace gem5 diff --git a/src/dev/reset_port.hh b/src/dev/reset_port.hh deleted file mode 100644 index a08db1ca8e..0000000000 --- a/src/dev/reset_port.hh +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2022 Google, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __DEV_RESET_PORT_HH__ -#define __DEV_RESET_PORT_HH__ - -#include "sim/port.hh" - -#include - -namespace gem5 -{ - -class ResetResponsePortBase : public Port -{ - public: - using Port::Port; - virtual void requestReset() = 0; -}; - -template -class ResetResponsePort : public ResetResponsePortBase -{ - public: - ResetResponsePort(const std::string &name, PortID id, Device *dev) : - ResetResponsePortBase(name, id), device(dev) {} - void requestReset() override { device->requestReset(); } - - private: - Device *device = nullptr; -}; - -class ResetRequestPort : public Port -{ - public: - ResetRequestPort(const std::string &_name, PortID _id) - : Port(_name, _id) {} - void bind(Port &p) override; - void unbind() override; - void requestReset(); - - private: - ResetResponsePortBase *peer = nullptr; -}; - -} // namespace gem5 - -#endif // __DEV_RESET_PORT_HH__ From af2cecf59e9cffbbc96bb88b9137da8ef6c74410 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 15 Dec 2022 11:43:01 -0800 Subject: [PATCH 063/492] gpu-compute: Fix ABI init for DispatchId DispatchId should allocate two SGPRs instead of one. Allocating one was causing all subsequent SGPR index values to be off by one, leading to bad addresses for things like flat scratch and private segment. This field is not used very often so it was not impacting most applications. Change-Id: I17744e2d099fbc0447f400211ba7f8a42675ea06 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66711 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/gpu-compute/wavefront.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 7e4b36f7e5..8a1adfe802 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -118,8 +118,10 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) { int regInitIdx = 0; - // iterate over all the init fields and check which - // bits are enabled + // Iterate over all the init fields and check which + // bits are enabled. Useful information can be found here: + // https://github.com/ROCm-Developer-Tools/ROCm-ComputeABI-Doc/ + // blob/master/AMDGPU-ABI.md for (int en_bit = 0; en_bit < NumScalarInitFields; ++en_bit) { if (task->sgprBitEnabled(en_bit)) { @@ -263,6 +265,12 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) computeUnit->cu_id, simdId, wfSlotId, wfDynId, physSgprIdx, task->dispatchId()); + + // Dispatch ID in gem5 is an int. Set upper 32-bits to zero. + physSgprIdx + = computeUnit->registerManager->mapSgpr(this, regInitIdx); + computeUnit->srf[simdId]->write(physSgprIdx, 0); + ++regInitIdx; break; case FlatScratchInit: physSgprIdx From 4cae2ae4adf202bd6fa520b6e6912088c8694dd9 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Fri, 9 Dec 2022 16:17:25 -0800 Subject: [PATCH 064/492] tests: Remove get_runtime_isa() from parsec_disk_run.py This change removes the call to get_runtime_isa(), as it has been deprecated. Change-Id: Ie1b0b5fb456fd8ed504a531841fe4ea8e211502c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66612 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Bobby Bruce --- tests/gem5/configs/parsec_disk_run.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/gem5/configs/parsec_disk_run.py b/tests/gem5/configs/parsec_disk_run.py index 4275ffbd3f..fbe1cd3688 100644 --- a/tests/gem5/configs/parsec_disk_run.py +++ b/tests/gem5/configs/parsec_disk_run.py @@ -214,11 +214,6 @@ board.set_kernel_disk_workload( readfile_contents=command, ) -print("Running with ISA: " + get_runtime_isa().name) -print("Running with protocol: " + get_runtime_coherence_protocol().name) -print() - - # Here we define some custom workbegin/workend exit event generators. Here we # want to switch to detailed CPUs at the beginning of the ROI, then continue to # the end of of the ROI. Then we exit the simulation. From 06f18242fedb67280c425e9e7b533326445580b1 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sat, 17 Dec 2022 03:11:53 -0800 Subject: [PATCH 065/492] tests: Fix compiler-tests.sh build args passing Reverts this fix: https://gem5-review.googlesource.com/c/public/gem5/+/66631 While this did fix the case where no build args were passed, it broke the case where build args were passed. This fix ensures the script works in both cases. Change-Id: I6cc8cc0c2a10c801d4a59e54b070383ac8ee93ae Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66772 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- tests/compiler-tests.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/compiler-tests.sh b/tests/compiler-tests.sh index f16e8e5fdb..f5d4bb189f 100755 --- a/tests/compiler-tests.sh +++ b/tests/compiler-tests.sh @@ -76,7 +76,13 @@ builds_per_compiler=1 base_url="gcr.io/gem5-test" # Arguments passed into scons on every build target test. -build_args="$@" +if [ $# -eq 0 ];then + # If none is sepcified by the user we pass "-j1" (compile on one thread). + # If `build_args` is left as an empty string, this script will fail. + build_args="-j1" +else + build_args="$@" +fi # Testing directory variables mkdir -p "${build_dir}" # Create the build directory if it doesn't exist. @@ -135,7 +141,7 @@ for compiler in ${images[@]}; do docker run --rm -v "${gem5_root}":"/gem5" -u $UID:$GID \ -w /gem5 --memory="${docker_mem_limit}" $repo_name \ /usr/bin/env python3 /usr/bin/scons --ignore-style \ - "${build_out} ${build_args}" + "${build_out}" "${build_args}" }>"${build_stdout}" 2>"${build_stderr}" result=$? From 7fb2fda841be0d61b00569c5a456fcdc0ab75bb1 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sun, 18 Dec 2022 10:33:10 -0300 Subject: [PATCH 066/492] base: Fix signature of SatCounter::saturate() The variants that use more than 8 bits were broken, since the size of the difference in those cases could be larger than 8 bits, and the return value was only 8-bits long. Change-Id: I8b75be48f924cc33ebf5e5aeff6d4045fac66bcc Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66791 Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair Tested-by: kokoro --- src/base/sat_counter.hh | 4 ++-- src/base/sat_counter.test.cc | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/base/sat_counter.hh b/src/base/sat_counter.hh index a607c4ca85..ecb8df8b49 100644 --- a/src/base/sat_counter.hh +++ b/src/base/sat_counter.hh @@ -318,9 +318,9 @@ class GenericSatCounter * * @ingroup api_sat_counter */ - uint8_t saturate() + T saturate() { - const uint8_t diff = maxVal - counter; + const T diff = maxVal - counter; counter = maxVal; return diff; } diff --git a/src/base/sat_counter.test.cc b/src/base/sat_counter.test.cc index 07a01c7279..0a6459c23f 100644 --- a/src/base/sat_counter.test.cc +++ b/src/base/sat_counter.test.cc @@ -149,6 +149,20 @@ TEST(SatCounterTest, Saturate) ASSERT_TRUE(counter.isSaturated()); } +TEST(SatCounterTest, Saturate16) +{ + const unsigned bits = 14; + const unsigned max_value = (1 << bits) - 1; + SatCounter16 counter(bits); + counter++; + ASSERT_FALSE(counter.isSaturated()); + + // Make sure the value added is what was missing to saturate + const unsigned diff = counter.saturate(); + ASSERT_EQ(diff, max_value - 1); + ASSERT_TRUE(counter.isSaturated()); +} + /** * Test back and forth against an int. */ From 5447d55e398746df7b038da447048e0ce47c8460 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 21 Dec 2022 09:00:30 +0000 Subject: [PATCH 067/492] dev: Fix -Wunused-variable in structured binding Change-Id: Ia244767dd9d1dd7b72c320fb78e48f206694f5a2 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66891 Tested-by: kokoro Reviewed-by: Yu-hsin Wang Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- src/dev/reg_bank.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh index 66d668b338..32d9058a15 100644 --- a/src/dev/reg_bank.hh +++ b/src/dev/reg_bank.hh @@ -1018,8 +1018,8 @@ class RegisterBank : public RegisterBankBase virtual void reset() { - for (auto &[offset, reg]: _offsetMap) - reg.get().reset(); + for (auto &it: _offsetMap) + it.second.get().reset(); } }; From 6797c78942ade19c6863cbace528c84d913775aa Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Fri, 16 Dec 2022 13:27:14 +0800 Subject: [PATCH 068/492] arch-riscv: Refactor compressed instructions 1. C.JAL should use CJOp format to generate code 2. Use sext function to handle MSB for immediate 3. Add IsCall flags to c.jal, c.jalr 4. Use JumpConstructor to CJOp format Change-Id: Id01c0d7cc1a3e17776890268879c568fc9996bc5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66732 Reviewed-by: Yu-hsin Wang Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/isa/decoder.isa | 136 +++++++++------------- src/arch/riscv/isa/formats/compressed.isa | 30 +++-- 2 files changed, 72 insertions(+), 94 deletions(-) diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 458327e5ec..c0703927b4 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -47,7 +47,7 @@ decode QUADRANT default Unknown::unknown() { CIMM8<7:6> << 4 | CIMM8<5:2> << 6; }}, {{ - if (machInst == 0) + if (imm == 0) return std::make_shared("zero instruction", machInst); Rp2 = rvSext(sp + imm); @@ -147,91 +147,71 @@ decode QUADRANT default Unknown::unknown() { } } 0x1: decode COPCODE { - format CIOp { - 0x0: c_addi({{ - imm = CIMM5; - if (CIMM1 > 0) - imm |= ~((uint64_t)0x1F); - }}, {{ - if ((RC1 == 0) != (imm == 0)) { - if (RC1 == 0) { - return std::make_shared( - "source reg x0", machInst); - } else { // imm == 0 - return std::make_shared( - "immediate = 0", machInst); - } + 0x0: CIOp::c_addi({{ + imm = sext<6>(CIMM5 | (CIMM1 << 5)); + }}, {{ + if ((RC1 == 0) != (imm == 0)) { + if (RC1 == 0) { + return std::make_shared( + "source reg x0", machInst); + } else { // imm == 0 + return std::make_shared( + "immediate = 0", machInst); } - Rc1_sd = rvSext(Rc1_sd + imm); - }}); - 0x1: decode RVTYPE { - 0x0: c_jal({{ - imm = sext<12>((CJUMPIMM3TO1 << 1) | - (CJUMPIMM4TO4 << 4) | - (CJUMPIMM5TO5 << 5) | - (CJUMPIMM6TO6 << 6) | - (CJUMPIMM7TO7 << 7) | - (CJUMPIMM9TO8 << 8) | - (CJUMPIMM10TO10 << 10) | - (CJUMPIMMSIGN << 11)); - }}, {{ - ra_sw = NPC_uw; - NPC_uw = PC_uw + imm; - }}); - 0x1: c_addiw({{ - imm = CIMM5; - if (CIMM1 > 0) - imm |= ~((uint64_t)0x1F); - }}, {{ - if (RC1 == 0) { - return std::make_shared( - "source reg x0", machInst); - } - Rc1_sw = (int32_t)(Rc1_sw + imm); - }}); } - 0x2: c_li({{ - imm = CIMM5; - if (CIMM1 > 0) - imm |= ~((uint64_t)0x1F); + Rc1_sd = rvSext(Rc1_sd + imm); + }}); + 0x1: decode RVTYPE { + 0x0: CJOp::c_jal({{ + ra_sw = NPC_uw; + NPC_uw = PC_uw + imm; + }}, IsDirectControl, IsUncondControl, IsCall); + 0x1: CIOp::c_addiw({{ + imm = sext<6>(CIMM5 | (CIMM1 << 5)); }}, {{ if (RC1 == 0) { return std::make_shared( "source reg x0", machInst); } + Rc1_sw = (int32_t)(Rc1_sw + imm); + }}); + } + 0x2: CIOp::c_li({{ + imm = sext<6>(CIMM5 | (CIMM1 << 5)); + }}, {{ + if (RC1 == 0) { + return std::make_shared( + "source reg x0", machInst); + } + Rc1_sd = imm; + }}); + 0x3: decode RC1 { + 0x2: CIOp::c_addi16sp({{ + imm = sext<10>((CIMM5<4:4> << 4) | + (CIMM5<0:0> << 5) | + (CIMM5<3:3> << 6) | + (CIMM5<2:1> << 7) | + (CIMM1 << 9)); + }}, {{ + if (imm == 0) { + return std::make_shared( + "immediate = 0", machInst); + } + sp_sd = rvSext(sp_sd + imm); + }}); + default: CIOp::c_lui({{ + imm = sext<6>(CIMM5 | (CIMM1 << 5)) << 12; + }}, {{ + if (RC1 == 0 || RC1 == 2) { + return std::make_shared( + "source reg x0", machInst); + } + if (imm == 0) { + return std::make_shared( + "immediate = 0", machInst); + } Rc1_sd = imm; }}); - 0x3: decode RC1 { - 0x2: c_addi16sp({{ - imm = CIMM5<4:4> << 4 | - CIMM5<0:0> << 5 | - CIMM5<3:3> << 6 | - CIMM5<2:1> << 7; - if (CIMM1 > 0) - imm |= ~((int64_t)0x1FF); - }}, {{ - if (imm == 0) { - return std::make_shared( - "immediate = 0", machInst); - } - sp_sd = rvSext(sp_sd + imm); - }}); - default: c_lui({{ - imm = CIMM5 << 12; - if (CIMM1 > 0) - imm |= ~((uint64_t)0x1FFFF); - }}, {{ - if (RC1 == 0 || RC1 == 2) { - return std::make_shared( - "source reg x0", machInst); - } - if (imm == 0) { - return std::make_shared( - "immediate = 0", machInst); - } - Rc1_sd = imm; - }}); - } } 0x4: decode CFUNCT2HIGH { format CIOp { @@ -418,7 +398,7 @@ decode QUADRANT default Unknown::unknown() { } ra = rvSext(NPC); NPC = rvZext(Rc1); - }}, IsIndirectControl, IsUncondControl); + }}, IsIndirectControl, IsUncondControl, IsCall); default: CompressedROp::c_add({{ Rc1_sd = rvSext(Rc1_sd + Rc2_sd); }}); diff --git a/src/arch/riscv/isa/formats/compressed.isa b/src/arch/riscv/isa/formats/compressed.isa index d09865803e..3d89ec38a6 100644 --- a/src/arch/riscv/isa/formats/compressed.isa +++ b/src/arch/riscv/isa/formats/compressed.isa @@ -61,33 +61,31 @@ def format CIOp(imm_code, code, imm_type='int64_t', *opt_flags) {{ def format CJOp(code, *opt_flags) {{ imm_code = """ - imm = CJUMPIMM3TO1 << 1 | - CJUMPIMM4TO4 << 4 | - CJUMPIMM5TO5 << 5 | - CJUMPIMM6TO6 << 6 | - CJUMPIMM7TO7 << 7 | - CJUMPIMM9TO8 << 8 | - CJUMPIMM10TO10 << 10; - if (CJUMPIMMSIGN) - imm |= ~((int64_t)0x7FF); + imm = sext<12>((CJUMPIMM3TO1 << 1) | + (CJUMPIMM4TO4 << 4) | + (CJUMPIMM5TO5 << 5) | + (CJUMPIMM6TO6 << 6) | + (CJUMPIMM7TO7 << 7) | + (CJUMPIMM9TO8 << 8) | + (CJUMPIMM10TO10 << 10) | + (CJUMPIMMSIGN << 11)); """ iop = InstObjParams(name, Name, 'ImmOp', {'code': code, 'imm_code': imm_code, 'regs': ''}, opt_flags) header_output = BranchDeclare.subst(iop) - decoder_output = ImmConstructor.subst(iop) + decoder_output = JumpConstructor.subst(iop) decode_block = BasicDecode.subst(iop) exec_output = BranchExecute.subst(iop) }}; def format CBOp(code, *opt_flags) {{ imm_code = """ - imm = CIMM5<2:1> << 1 | - CIMM3<1:0> << 3 | - CIMM5<0:0> << 5 | - CIMM5<4:3> << 6; - if (CIMM3<2:2> > 0) - imm |= ~((int64_t)0xFF); + imm = sext<9>((CIMM5<2:1> << 1) | + (CIMM3<1:0> << 3) | + (CIMM5<0:0> << 5) | + (CIMM5<4:3> << 6) | + (CIMM3<2:2> << 8)); """ regs = 'srcRegIdx(0)' iop = InstObjParams(name, Name, 'ImmOp', From 9ce8c9b81c046328ced0c3b9e41789c593b4bf94 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Mon, 19 Dec 2022 14:08:22 +0800 Subject: [PATCH 069/492] arch-riscv: Refactor template JumpConstructor Add COPCODE == 4 condition to ensure the available instruction is either c_jr or c_jalr and the flag IsReturn should set for instruction c_jalr if RC1 == t0 Change-Id: I1b39a6c1dc52c8035f16cc64a1b4c494b14879c0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66811 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/arch/riscv/isa/formats/standard.isa | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa index 6be281fa56..bb500f5f49 100644 --- a/src/arch/riscv/isa/formats/standard.isa +++ b/src/arch/riscv/isa/formats/standard.isa @@ -250,9 +250,14 @@ def template JumpConstructor {{ %(constructor)s; %(imm_code)s; if (QUADRANT != 0x3) { - // Handle "c_jr" instruction, set "IsReturn" flag if RC1 is 1 or 5 - if (CFUNCT1 == 0 && (RC1 == 1 || RC1 == 5)) - flags[IsReturn] = true; + if (COPCODE == 4) { + // Handle "c_jr" instruction, set "IsReturn" flag if RC1 is 1 or 5 + if (CFUNCT1 == 0 && (RC1 == 1 || RC1 == 5)) + flags[IsReturn] = true; + // Handle "c_jalr" instruction, set IsReturn if RC1 != ra + if (CFUNCT1 == 1 && RC1 == 5) + flags[IsReturn] = true; + } } else { bool rd_link = (RD == 1 || RD == 5); bool rs1_link = (RS1 == 1 || RS1 == 5); From 9cd61d000a33a91f679d4c47f45b89c4b68592d4 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Fri, 23 Dec 2022 10:05:46 +0800 Subject: [PATCH 070/492] arch-riscv: Correct the IllegalInstFault messege of instruction c.addi4spn In Riscv Manual Volumn I: Unpriviledged ISA section 18.5, c.addi4spn will not working if imm == 0, not machInst == 0. It is changed in the https://gem5-review.git.corp.google.com/c/public/gem5/+/66732, and here is the additional patch to the CL. Change-Id: I2a3c9660dc43f1399f68e03c4f59207f869807a0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66931 Reviewed-by: Yu-hsin Wang Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/isa/decoder.isa | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index c0703927b4..53d4a4d558 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -48,7 +48,7 @@ decode QUADRANT default Unknown::unknown() { CIMM8<5:2> << 6; }}, {{ if (imm == 0) - return std::make_shared("zero instruction", + return std::make_shared("immediate = 0", machInst); Rp2 = rvSext(sp + imm); }}, uint64_t); From 8d117aad71283601c6ceb7145817df1557c3bdfd Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 19 Dec 2022 15:56:49 +0000 Subject: [PATCH 071/492] util: cxxConfigInit has been removed by gem5 This was merged in [1] [1]: https://gem5-review.googlesource.com/c/public/gem5/+/49455 Change-Id: Iba558dd01d5c8fbc05e4d3a106a3e3ff6b696333 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66851 Reviewed-by: Daniel Carvalho Maintainer: Daniel Carvalho Tested-by: kokoro Reviewed-by: Matthias Jung --- util/tlm/src/sim_control.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/util/tlm/src/sim_control.cc b/util/tlm/src/sim_control.cc index a8a3da4a3f..834cfe064e 100644 --- a/util/tlm/src/sim_control.cc +++ b/util/tlm/src/sim_control.cc @@ -72,8 +72,6 @@ Gem5SimControl::Gem5SimControl(sc_core::sc_module_name name, } instance = this; - gem5::cxxConfigInit(); - // register the systemc slave and master port handler gem5::ExternalSlave::registerHandler("tlm_slave", new SCSlavePortHandler(*this)); From 25b4defa6aff3873e283c54615cda214786b5db7 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 19 Dec 2022 16:00:34 +0000 Subject: [PATCH 072/492] util: Fix missing include of sim/core.hh in util-tlm Change-Id: I6dbf71dac903a660369bf8b33ae0c88d28d07457 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66852 Reviewed-by: Matthias Jung Tested-by: kokoro Reviewed-by: Daniel Carvalho Maintainer: Daniel Carvalho --- util/tlm/src/sc_master_port.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/util/tlm/src/sc_master_port.cc b/util/tlm/src/sc_master_port.cc index 2e1082818c..c0bb6d5f5f 100644 --- a/util/tlm/src/sc_master_port.cc +++ b/util/tlm/src/sc_master_port.cc @@ -36,6 +36,7 @@ #include "params/ExternalMaster.hh" #include "sc_ext.hh" #include "sc_master_port.hh" +#include "sim/core.hh" #include "sim/system.hh" namespace Gem5SystemC From 55fb8bf40e19c63ebbc4fa8c0a04bce838514c7a Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 19 Dec 2022 16:06:13 +0000 Subject: [PATCH 073/492] util: Update util-tlm to require C++17 It's the version we currently use to compile gem5 Change-Id: I5d2d26e5ba32191d65a4a5ae58d29a16970d062d Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66853 Maintainer: Daniel Carvalho Reviewed-by: Daniel Carvalho Tested-by: kokoro --- util/tlm/SConstruct | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct index f1e057db70..1a9a79f402 100644 --- a/util/tlm/SConstruct +++ b/util/tlm/SConstruct @@ -51,11 +51,12 @@ shlibsuffix = env['SHLIBSUFFIX'] env.Append(CPPPATH=[gem5_root + '/build/' + gem5_arch, gem5_root + '/util/systemc/gem5_within_systemc', gem5_root + '/ext/systemc/src', + gem5_root + '/ext', '#src', '#examples/common', ]) -env.Append(CXXFLAGS=['-std=c++14', +env.Append(CXXFLAGS=['-std=c++17', '-DSC_INCLUDE_DYNAMIC_PROCESSES', '-DTRACING_ON', ]) From fcde59b24525056bea598278ee8d496182ede7fd Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 19 Dec 2022 17:09:28 +0000 Subject: [PATCH 074/492] util: ext/systemc is importing env Environment instead of main This got changed by [1] With this patch we export env instead of main. There is no risk of ext/systemc polluting the environment as its SConscript is cloning env to the systemc variable anyway, so this double copy was redundant anyway [1]: https://gem5-review.googlesource.com/c/public/gem5/+/56750 Change-Id: Ib6648e9b38416cac0bc7f06d90a337f32bdca6ca Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66854 Maintainer: Daniel Carvalho Tested-by: kokoro Reviewed-by: Daniel Carvalho --- util/tlm/SConstruct | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct index 1a9a79f402..7fe09d446d 100644 --- a/util/tlm/SConstruct +++ b/util/tlm/SConstruct @@ -74,15 +74,14 @@ deps += SConscript('examples/common/SConscript', # the SystemC SConscript makes certain assumptions, we need to fulfill these # assumptions before calling the SConscript. -main = env sys.path.append(gem5_root + '/src/python') AddOption('--no-colors', dest='use_colors', action='store_false', help="Don't add color to abbreviated scons output") -main.SConsignFile('build/systemc/sconsign') +env.SConsignFile('build/systemc/sconsign') SConscript(gem5_root + '/ext/systemc/SConscript', variant_dir='build/systemc', - exports='main') + exports='env') # By adding libraries as dependencies instead of using LIBS, we avoid that # the user needs to set the LD_LIBRARY_PATH From 083566d0c82d2610b246b0b5ad903766e336ac31 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Fri, 9 Dec 2022 10:31:50 +0800 Subject: [PATCH 075/492] arch-riscv: add RV32 ADFIMU_Zfh instruction tests 1. Add rv32 binary files into asmtests 2. Support Riscv CPU with 32 bits register to simple_binary_run.py Change-Id: I5cc4c2eeb7654a4acc2d167eb76d8b6522e65dd9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65533 Reviewed-by: Yu-hsin Wang Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce --- tests/gem5/asmtest/tests.py | 310 +++++++++++++----------- tests/gem5/configs/simple_binary_run.py | 61 ++++- 2 files changed, 211 insertions(+), 160 deletions(-) diff --git a/tests/gem5/asmtest/tests.py b/tests/gem5/asmtest/tests.py index b2a5992da0..0ddffb27cf 100644 --- a/tests/gem5/asmtest/tests.py +++ b/tests/gem5/asmtest/tests.py @@ -34,156 +34,159 @@ else: # The following lists the RISCV binaries. Those commented out presently result # in a test failure. This is outlined in the following Jira issue: # https://gem5.atlassian.net/browse/GEM5-496 -binaries = ( - "rv64samt-ps-sysclone_d", - "rv64samt-ps-sysfutex1_d", +binary_configs = ( + ("rv{}samt-ps-sysclone_d", (64,)), + ("rv{}samt-ps-sysfutex1_d", (64,)), # 'rv64samt-ps-sysfutex2_d', - "rv64samt-ps-sysfutex3_d", + ("rv{}samt-ps-sysfutex3_d", (64,)), # 'rv64samt-ps-sysfutex_d', - "rv64ua-ps-amoadd_d", - "rv64ua-ps-amoadd_w", - "rv64ua-ps-amoand_d", - "rv64ua-ps-amoand_w", - "rv64ua-ps-amomax_d", - "rv64ua-ps-amomax_w", - "rv64ua-ps-amomaxu_d", - "rv64ua-ps-amomaxu_w", - "rv64ua-ps-amomin_d", - "rv64ua-ps-amomin_w", - "rv64ua-ps-amominu_d", - "rv64ua-ps-amominu_w", - "rv64ua-ps-amoor_d", - "rv64ua-ps-amoor_w", - "rv64ua-ps-amoswap_d", - "rv64ua-ps-amoswap_w", - "rv64ua-ps-amoxor_d", - "rv64ua-ps-amoxor_w", - "rv64ua-ps-lrsc", - "rv64uamt-ps-amoadd_d", - "rv64uamt-ps-amoand_d", - "rv64uamt-ps-amomax_d", - "rv64uamt-ps-amomaxu_d", - "rv64uamt-ps-amomin_d", - "rv64uamt-ps-amominu_d", - "rv64uamt-ps-amoor_d", - "rv64uamt-ps-amoswap_d", - "rv64uamt-ps-amoxor_d", - "rv64uamt-ps-lrsc_d", - "rv64ud-ps-fadd", - "rv64ud-ps-fclass", - "rv64ud-ps-fcmp", - "rv64ud-ps-fcvt", - "rv64ud-ps-fcvt_w", - "rv64ud-ps-fdiv", - "rv64ud-ps-fmadd", - "rv64ud-ps-fmin", - "rv64ud-ps-ldst", - "rv64ud-ps-move", - "rv64ud-ps-recoding", - "rv64ud-ps-structural", - "rv64uf-ps-fadd", - "rv64uf-ps-fclass", - "rv64uf-ps-fcmp", - "rv64uf-ps-fcvt", - "rv64uf-ps-fcvt_w", - "rv64uf-ps-fdiv", - "rv64uf-ps-fmadd", - "rv64uf-ps-fmin", - "rv64uf-ps-ldst", - "rv64uf-ps-move", - "rv64uf-ps-recoding", - "rv64ui-ps-add", - "rv64ui-ps-addi", - "rv64ui-ps-addiw", - "rv64ui-ps-addw", - "rv64ui-ps-and", - "rv64ui-ps-andi", - "rv64ui-ps-auipc", - "rv64ui-ps-beq", - "rv64ui-ps-bge", - "rv64ui-ps-bgeu", - "rv64ui-ps-blt", - "rv64ui-ps-bltu", - "rv64ui-ps-bne", - "rv64ui-ps-fence_i", - "rv64ui-ps-jal", - "rv64ui-ps-jalr", - "rv64ui-ps-lb", - "rv64ui-ps-lbu", - "rv64ui-ps-ld", - "rv64ui-ps-lh", - "rv64ui-ps-lhu", - "rv64ui-ps-lui", - "rv64ui-ps-lw", - "rv64ui-ps-lwu", - "rv64ui-ps-or", - "rv64ui-ps-ori", - "rv64ui-ps-sb", - "rv64ui-ps-sd", - "rv64ui-ps-sh", - "rv64ui-ps-simple", - "rv64ui-ps-sll", - "rv64ui-ps-slli", - "rv64ui-ps-slliw", - "rv64ui-ps-sllw", - "rv64ui-ps-slt", - "rv64ui-ps-slti", - "rv64ui-ps-sltiu", - "rv64ui-ps-sltu", - "rv64ui-ps-sra", - "rv64ui-ps-srai", - "rv64ui-ps-sraiw", - "rv64ui-ps-sraw", - "rv64ui-ps-srl", - "rv64ui-ps-srli", - "rv64ui-ps-srliw", - "rv64ui-ps-srlw", - "rv64ui-ps-sub", - "rv64ui-ps-subw", - "rv64ui-ps-sw", - "rv64ui-ps-xor", - "rv64ui-ps-xori", - "rv64um-ps-div", - "rv64um-ps-divu", - "rv64um-ps-divuw", - "rv64um-ps-divw", - "rv64um-ps-mul", - "rv64um-ps-mulh", - "rv64um-ps-mulhsu", - "rv64um-ps-mulhu", - "rv64um-ps-mulw", - "rv64um-ps-rem", - "rv64um-ps-remu", - "rv64um-ps-remuw", - "rv64um-ps-remw", - "rv64uzfh-ps-fadd", - "rv64uzfh-ps-fclass", - "rv64uzfh-ps-fcmp", - "rv64uzfh-ps-fcvt", - "rv64uzfh-ps-fcvt_w", - "rv64uzfh-ps-fdiv", - "rv64uzfh-ps-fmadd", - "rv64uzfh-ps-fmin", - "rv64uzfh-ps-ldst", - "rv64uzfh-ps-move", - "rv64uzfh-ps-recoding", + ("rv{}ua-ps-amoadd_d", (64,)), + ("rv{}ua-ps-amoadd_w", (32, 64)), + ("rv{}ua-ps-amoand_d", (64,)), + ("rv{}ua-ps-amoand_w", (32, 64)), + ("rv{}ua-ps-amomax_d", (64,)), + ("rv{}ua-ps-amomax_w", (32, 64)), + ("rv{}ua-ps-amomaxu_d", (64,)), + ("rv{}ua-ps-amomaxu_w", (32, 64)), + ("rv{}ua-ps-amomin_d", (64,)), + ("rv{}ua-ps-amomin_w", (32, 64)), + ("rv{}ua-ps-amominu_d", (64,)), + ("rv{}ua-ps-amominu_w", (32, 64)), + ("rv{}ua-ps-amoor_d", (64,)), + ("rv{}ua-ps-amoor_w", (32, 64)), + ("rv{}ua-ps-amoswap_d", (64,)), + ("rv{}ua-ps-amoswap_w", (32, 64)), + ("rv{}ua-ps-amoxor_d", (64,)), + ("rv{}ua-ps-amoxor_w", (32, 64)), + ("rv{}ua-ps-lrsc", (32, 64)), + ("rv{}uamt-ps-amoadd_d", (64,)), + ("rv{}uamt-ps-amoand_d", (64,)), + ("rv{}uamt-ps-amomax_d", (64,)), + ("rv{}uamt-ps-amomaxu_d", (64,)), + ("rv{}uamt-ps-amomin_d", (64,)), + ("rv{}uamt-ps-amominu_d", (64,)), + ("rv{}uamt-ps-amoor_d", (64,)), + ("rv{}uamt-ps-amoswap_d", (64,)), + ("rv{}uamt-ps-amoxor_d", (64,)), + ("rv{}uamt-ps-lrsc_d", (64,)), + ("rv{}uamt-ps-amoadd_w", (32,)), + ("rv{}uamt-ps-amoand_w", (32,)), + ("rv{}uamt-ps-amomax_w", (32,)), + ("rv{}uamt-ps-amomaxu_w", (32,)), + ("rv{}uamt-ps-amomin_w", (32,)), + ("rv{}uamt-ps-amominu_w", (32,)), + ("rv{}uamt-ps-amoor_w", (32,)), + ("rv{}uamt-ps-amoswap_w", (32,)), + ("rv{}uamt-ps-amoxor_w", (32,)), + ("rv{}uamt-ps-lrsc_w", (32,)), + ("rv{}ud-ps-fadd", (32, 64)), + ("rv{}ud-ps-fclass", (32, 64)), + ("rv{}ud-ps-fcmp", (32, 64)), + ("rv{}ud-ps-fcvt", (32, 64)), + ("rv{}ud-ps-fcvt_w", (32, 64)), + ("rv{}ud-ps-fdiv", (32, 64)), + ("rv{}ud-ps-fmadd", (32, 64)), + ("rv{}ud-ps-fmin", (32, 64)), + ("rv{}ud-ps-ldst", (32, 64)), + ("rv{}ud-ps-move", (64,)), + ("rv{}ud-ps-recoding", (32, 64)), + ("rv{}ud-ps-structural", (64,)), + ("rv{}uf-ps-fadd", (32, 64)), + ("rv{}uf-ps-fclass", (32, 64)), + ("rv{}uf-ps-fcmp", (32, 64)), + ("rv{}uf-ps-fcvt", (32, 64)), + ("rv{}uf-ps-fcvt_w", (32, 64)), + ("rv{}uf-ps-fdiv", (32, 64)), + ("rv{}uf-ps-fmadd", (32, 64)), + ("rv{}uf-ps-fmin", (32, 64)), + ("rv{}uf-ps-ldst", (32, 64)), + ("rv{}uf-ps-move", (32, 64)), + ("rv{}uf-ps-recoding", (32, 64)), + ("rv{}ui-ps-add", (32, 64)), + ("rv{}ui-ps-addi", (32, 64)), + ("rv{}ui-ps-addiw", (64,)), + ("rv{}ui-ps-addw", (64,)), + ("rv{}ui-ps-and", (32, 64)), + ("rv{}ui-ps-andi", (32, 64)), + ("rv{}ui-ps-auipc", (32, 64)), + ("rv{}ui-ps-beq", (32, 64)), + ("rv{}ui-ps-bge", (32, 64)), + ("rv{}ui-ps-bgeu", (32, 64)), + ("rv{}ui-ps-blt", (32, 64)), + ("rv{}ui-ps-bltu", (32, 64)), + ("rv{}ui-ps-bne", (32, 64)), + ("rv{}ui-ps-fence_i", (32, 64)), + ("rv{}ui-ps-jal", (32, 64)), + ("rv{}ui-ps-jalr", (32, 64)), + ("rv{}ui-ps-lb", (32, 64)), + ("rv{}ui-ps-lbu", (32, 64)), + ("rv{}ui-ps-ld", (64,)), + ("rv{}ui-ps-lh", (32, 64)), + ("rv{}ui-ps-lhu", (32, 64)), + ("rv{}ui-ps-lui", (32, 64)), + ("rv{}ui-ps-lw", (32, 64)), + ("rv{}ui-ps-lwu", (64,)), + ("rv{}ui-ps-or", (32, 64)), + ("rv{}ui-ps-ori", (32, 64)), + ("rv{}ui-ps-sb", (32, 64)), + ("rv{}ui-ps-sd", (64,)), + ("rv{}ui-ps-sh", (32, 64)), + ("rv{}ui-ps-simple", (32, 64)), + ("rv{}ui-ps-sll", (32, 64)), + ("rv{}ui-ps-slli", (32, 64)), + ("rv{}ui-ps-slliw", (64,)), + ("rv{}ui-ps-sllw", (64,)), + ("rv{}ui-ps-slt", (32, 64)), + ("rv{}ui-ps-slti", (32, 64)), + ("rv{}ui-ps-sltiu", (32, 64)), + ("rv{}ui-ps-sltu", (32, 64)), + ("rv{}ui-ps-sra", (32, 64)), + ("rv{}ui-ps-srai", (32, 64)), + ("rv{}ui-ps-sraiw", (64,)), + ("rv{}ui-ps-sraw", (64,)), + ("rv{}ui-ps-srl", (32, 64)), + ("rv{}ui-ps-srli", (32, 64)), + ("rv{}ui-ps-srliw", (64,)), + ("rv{}ui-ps-srlw", (64,)), + ("rv{}ui-ps-sub", (32, 64)), + ("rv{}ui-ps-subw", (64,)), + ("rv{}ui-ps-sw", (32, 64)), + ("rv{}ui-ps-xor", (32, 64)), + ("rv{}ui-ps-xori", (32, 64)), + ("rv{}um-ps-div", (32, 64)), + ("rv{}um-ps-divu", (32, 64)), + ("rv{}um-ps-divuw", (64,)), + ("rv{}um-ps-divw", (64,)), + ("rv{}um-ps-mul", (32, 64)), + ("rv{}um-ps-mulh", (32, 64)), + ("rv{}um-ps-mulhsu", (32, 64)), + ("rv{}um-ps-mulhu", (32, 64)), + ("rv{}um-ps-mulw", (64,)), + ("rv{}um-ps-rem", (32, 64)), + ("rv{}um-ps-remu", (32, 64)), + ("rv{}um-ps-remuw", (64,)), + ("rv{}um-ps-remw", (64,)), + ("rv{}uzfh-ps-fadd", (32, 64)), + ("rv{}uzfh-ps-fclass", (32, 64)), + ("rv{}uzfh-ps-fcmp", (32, 64)), + ("rv{}uzfh-ps-fcvt", (32, 64)), + ("rv{}uzfh-ps-fcvt_w", (32, 64)), + ("rv{}uzfh-ps-fdiv", (32, 64)), + ("rv{}uzfh-ps-fmadd", (32, 64)), + ("rv{}uzfh-ps-fmin", (32, 64)), + ("rv{}uzfh-ps-ldst", (32, 64)), + ("rv{}uzfh-ps-move", (32, 64)), + ("rv{}uzfh-ps-recoding", (32, 64)), ) cpu_types = ("atomic", "timing", "minor", "o3") for cpu_type in cpu_types: - for binary in binaries: - gem5_verify_config( - name=f"asm-riscv-{binary}-{cpu_type}", - verifiers=(), - config=joinpath( - config.base_dir, - "tests", - "gem5", - "configs", - "simple_binary_run.py", - ), - config_args=[ + for cfg in binary_configs: + template_bin, all_bits = cfg + for bits in all_bits: + binary = template_bin.format(bits) + config_args = [ binary, cpu_type, "riscv", @@ -191,7 +194,20 @@ for cpu_type in cpu_types: "4", "--resource-directory", resource_path, - ], - valid_isas=(constants.all_compiled_tag,), - valid_hosts=constants.supported_hosts, - ) + ] + if bits == 32: + config_args.extend(["-b", "--riscv-32bits"]) + gem5_verify_config( + name=f"asm-riscv-{binary}-{cpu_type}", + verifiers=(), + config=joinpath( + config.base_dir, + "tests", + "gem5", + "configs", + "simple_binary_run.py", + ), + config_args=config_args, + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + ) diff --git a/tests/gem5/configs/simple_binary_run.py b/tests/gem5/configs/simple_binary_run.py index d69e1a1169..fbb0313f93 100644 --- a/tests/gem5/configs/simple_binary_run.py +++ b/tests/gem5/configs/simple_binary_run.py @@ -1,4 +1,5 @@ # Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2022 Google Inc # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -44,12 +45,23 @@ from gem5.components.processors.simple_core import SimpleCore from gem5.components.boards.mem_mode import MemMode from gem5.components.processors.cpu_types import CPUTypes from gem5.simulate.simulator import Simulator -from gem5.isas import get_isa_from_str, get_isas_str_set +from gem5.isas import get_isa_from_str, get_isas_str_set, ISA + +from m5.util import fatal import argparse +import importlib from python.gem5.components.processors.base_cpu_core import BaseCPUCore +cpu_types_string_map = { + CPUTypes.ATOMIC: "AtomicSimpleCPU", + CPUTypes.O3: "O3CPU", + CPUTypes.TIMING: "TimingSimpleCPU", + CPUTypes.KVM: "KvmCPU", + CPUTypes.MINOR: "MinorCPU", +} + parser = argparse.ArgumentParser( description="A gem5 script for running simple binaries in SE mode." ) @@ -73,6 +85,12 @@ parser.add_argument( help="Use the BaseCPUProcessor instead of the SimpleProcessor.", ) +parser.add_argument( + "--riscv-32bits", + action="store_true", + help="Use 32 bits core of Riscv CPU", +) + parser.add_argument( "-r", "--resource-directory", @@ -105,26 +123,43 @@ args = parser.parse_args() cache_hierarchy = NoCache() memory = SingleChannelDDR3_1600() +isa_enum = get_isa_from_str(args.isa) +cpu_enum = get_cpu_type_from_str(args.cpu) + +if isa_enum == ISA.RISCV and args.riscv_32bits and not args.base_cpu_processor: + fatal("To use Riscv 32 CPU, the base_cpu_processor must be specify!") + if args.base_cpu_processor: - cores = [ - BaseCPUCore( - core=SimpleCore.cpu_simobject_factory( - cpu_type=get_cpu_type_from_str(args.cpu), - isa=get_isa_from_str(args.isa), - core_id=i, - ), - isa=get_isa_from_str(args.isa), + + if isa_enum == ISA.RISCV and args.riscv_32bits: + m5_objects = importlib.import_module("m5.objects") + cpu_class = getattr( + m5_objects, f"Riscv32{cpu_types_string_map[cpu_enum]}" ) - for i in range(args.num_cores) - ] + cores = [ + BaseCPUCore(core=cpu_class(cpu_id=i), isa=isa_enum) + for i in range(args.num_cores) + ] + else: + cores = [ + BaseCPUCore( + core=SimpleCore.cpu_simobject_factory( + cpu_type=cpu_enum, + isa=isa_enum, + core_id=i, + ), + isa=isa_enum, + ) + for i in range(args.num_cores) + ] processor = BaseCPUProcessor( cores=cores, ) else: processor = SimpleProcessor( - cpu_type=get_cpu_type_from_str(args.cpu), - isa=get_isa_from_str(args.isa), + cpu_type=cpu_enum, + isa=isa_enum, num_cores=args.num_cores, ) From 5db889572a31ec49eae5a9d39e704955a1527041 Mon Sep 17 00:00:00 2001 From: Rocky Tatiefo Date: Thu, 29 Dec 2022 20:11:08 -0800 Subject: [PATCH 076/492] base: Remove unused output.hh dependency from trace.cc Change-Id: Ie80ad5f3fb9fc7ee1e35f0624317e0e58cbf152d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67011 Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/base/trace.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/base/trace.cc b/src/base/trace.cc index 52faa8d725..272b035e0f 100644 --- a/src/base/trace.cc +++ b/src/base/trace.cc @@ -38,7 +38,6 @@ #include "base/atomicio.hh" #include "base/logging.hh" -#include "base/output.hh" #include "base/str.hh" #include "debug/FmtFlag.hh" #include "debug/FmtStackTrace.hh" From 66d4a158207aba57ad06a524c8e9053745fd8e45 Mon Sep 17 00:00:00 2001 From: Vishnu Ramadas Date: Mon, 26 Dec 2022 19:14:11 -0600 Subject: [PATCH 077/492] gpu-compute,mem-ruby: Add support for GPU cache bypassing The GPU cache models do not support cache bypassing when the GLC or SLC AMDGPU instruction modifiers are used in a load or store. This commit adds cache bypass support by introducing new transitions in the coherence protocol used by the GPU memory system. Now, instructions with the GLC bit set will not cache in the L1 and instructions with SLC bit set will not cache in L1 or L2. Change-Id: Id29a47b0fa7e16a21a7718949db802f85e9897c3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66991 Reviewed-by: Jason Lowe-Power Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/mem/packet.hh | 10 + src/mem/request.hh | 11 ++ src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 178 +++++++++++++++++- src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 45 ++++- src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm | 30 +++ src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm | 12 ++ src/mem/ruby/protocol/RubySlicc_MemControl.sm | 2 + src/mem/ruby/protocol/RubySlicc_Types.sm | 2 + src/mem/ruby/slicc_interface/RubyRequest.hh | 34 ++++ 9 files changed, 316 insertions(+), 8 deletions(-) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 9238dbec00..a80b918798 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -1100,6 +1100,16 @@ class Packet : public Printable flags.set(VALID_SIZE); } + /** + * Accessor functions for the cache bypass flags. The cache bypass + * can specify which levels in the hierarchy to bypass. If GLC_BIT + * is set, the requests are globally coherent and bypass TCP. + * If SLC_BIT is set, then the requests are system level coherent + * and bypass both TCP and TCC. + */ + bool isGLCSet() const { return req->isGLCSet();} + bool isSLCSet() const { return req->isSLCSet();} + /** * Check if packet corresponds to a given block-aligned address and * address space. diff --git a/src/mem/request.hh b/src/mem/request.hh index 39d9d7281c..6a0cbc21d4 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -1071,6 +1071,17 @@ class Request bool isAcquire() const { return _cacheCoherenceFlags.isSet(ACQUIRE); } + + /** + * Accessor functions for the cache bypass flags. The cache bypass + * can specify which levels in the hierarchy to bypass. If GLC_BIT + * is set, the requests are globally coherent and bypass TCP. + * If SLC_BIT is set, then the requests are system level coherent + * and bypass both TCP and TCC. + */ + bool isGLCSet() const {return _cacheCoherenceFlags.isSet(GLC_BIT); } + bool isSLCSet() const {return _cacheCoherenceFlags.isSet(SLC_BIT); } + /** * Accessor functions for the memory space configuration flags and used by * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index 032a64cec4..ae142471fa 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -56,8 +56,10 @@ machine(MachineType:TCC, "TCC Cache") enumeration(Event, desc="TCC Events") { // Requests coming from the Cores RdBlk, desc="RdBlk event"; + RdBypassEvict, desc="Bypass L2 on reads. Evict if cache block already allocated"; WrVicBlk, desc="L1 Write Through"; WrVicBlkBack, desc="L1 Write Through(dirty cache)"; + WrVicBlkEvict, desc="L1 Write Through(dirty cache) and evict"; Atomic, desc="Atomic Op"; AtomicDone, desc="AtomicOps Complete"; AtomicNotDone, desc="AtomicOps not Complete"; @@ -68,6 +70,7 @@ machine(MachineType:TCC, "TCC Cache") PrbInv, desc="Invalidating probe"; // Coming from Memory Controller WBAck, desc="writethrough ack from memory"; + Bypass, desc="Bypass the entire L2 cache"; } // STATES @@ -107,6 +110,8 @@ machine(MachineType:TCC, "TCC Cache") NetDest Destination, desc="Data destination"; int numAtomics, desc="number remaining atomics"; int atomicDoneCnt, desc="number AtomicDones triggered"; + bool isGLCSet, desc="Bypass L1 Cache"; + bool isSLCSet, desc="Bypass L1 and L2 Cache"; } structure(TBETable, external="yes") { @@ -173,7 +178,6 @@ machine(MachineType:TCC, "TCC Cache") int functionalWrite(Addr addr, Packet *pkt) { int num_functional_writes := 0; - TBE tbe := TBEs.lookup(addr); if(is_valid(tbe)) { num_functional_writes := num_functional_writes + @@ -279,7 +283,11 @@ machine(MachineType:TCC, "TCC Cache") peek(responseFromNB_in, ResponseMsg, block_on="addr") { TBE tbe := TBEs.lookup(in_msg.addr); Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.Type == CoherenceResponseType:NBSysResp) { + if (in_msg.isSLCSet) { + // If the SLC bit is set, the response needs to bypass the cache + // and should not be allocated an entry. + trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); + } else if (in_msg.Type == CoherenceResponseType:NBSysResp) { if(presentOrAvail(in_msg.addr)) { trigger(Event:Data, in_msg.addr, cache_entry, tbe); } else { @@ -313,7 +321,18 @@ machine(MachineType:TCC, "TCC Cache") TBE tbe := TBEs.lookup(in_msg.addr); Entry cache_entry := getCacheEntry(in_msg.addr); if (in_msg.Type == CoherenceRequestType:WriteThrough) { - if(WB) { + if (in_msg.isSLCSet) { + // The request should bypass the cache if SLC bit is set. + // If the cache entry exists already, then evict it. + // Else, perform a normal cache access. + // The cache entry is allocated only on response and bypass is + // handled there + if(presentOrAvail(in_msg.addr)) { + trigger(Event:WrVicBlkEvict, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); + } + } else if(WB) { if(presentOrAvail(in_msg.addr)) { trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe); } else { @@ -326,7 +345,13 @@ machine(MachineType:TCC, "TCC Cache") } else if (in_msg.Type == CoherenceRequestType:Atomic) { trigger(Event:Atomic, in_msg.addr, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:RdBlk) { - trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + if (in_msg.isSLCSet) { + // If SLC bit is set, the request needs to go directly to memory. + // If a cache block already exists, then evict it. + trigger(Event:RdBypassEvict, in_msg.addr, cache_entry, tbe); + } else { + trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe); + } } else { DPRINTF(RubySlicc, "%s\n", in_msg); error("Unexpected Response Message to Core"); @@ -354,6 +379,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.Dirty := false; out_msg.State := CoherenceState:Shared; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -371,15 +398,46 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Dirty := false; out_msg.State := CoherenceState:Shared; DPRINTF(RubySlicc, "%s\n", out_msg); + peek(responseFromNB_in, ResponseMsg) { + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } enqueue(unblockToNB_out, UnblockMsg, 1) { out_msg.addr := address; out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); out_msg.MessageSize := MessageSizeType:Unblock_Control; + peek(responseFromNB_in, ResponseMsg) { + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } DPRINTF(RubySlicc, "%s\n", out_msg); } } + action(rb_bypassDone, "rb", desc="bypass L2 of read access") { + peek(responseFromNB_in, ResponseMsg) { + enqueue(responseToCore_out, ResponseMsg, l2_response_latency) { + out_msg.addr := address; + out_msg.Type := CoherenceResponseType:TDSysResp; + out_msg.Sender := machineID; + out_msg.Destination := tbe.Destination; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.Dirty := false; + out_msg.State := CoherenceState:Shared; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + enqueue(unblockToNB_out, UnblockMsg, 1) { + out_msg.addr := address; + out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); + out_msg.MessageSize := MessageSizeType:Unblock_Control; + DPRINTF(RubySlicc, "%s\n", out_msg); + } + } + } action(rd_requestData, "r", desc="Miss in L2, pass on") { if(tbe.Destination.count()==1){ @@ -391,6 +449,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); out_msg.Shared := false; // unneeded for this request out_msg.MessageSize := in_msg.MessageSize; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -407,6 +467,9 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.instSeqNum := in_msg.instSeqNum; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } @@ -421,6 +484,9 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.instSeqNum := in_msg.instSeqNum; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } @@ -434,6 +500,9 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Sender := machineID; out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk := in_msg.DataBlk; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } @@ -466,6 +535,8 @@ machine(MachineType:TCC, "TCC Cache") peek(coreRequestNetwork_in, CPURequestMsg) { if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){ tbe.Destination.add(in_msg.Requestor); + tbe.isGLCSet := in_msg.isGLCSet; + tbe.isSLCSet := in_msg.isSLCSet; } } } @@ -505,6 +576,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.DataBlk := in_msg.DataBlk; out_msg.writeMask.orMask(in_msg.writeMask); out_msg.instSeqNum := in_msg.instSeqNum; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -520,6 +593,10 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Dirty := true; out_msg.DataBlk := cache_entry.DataBlk; out_msg.writeMask.orMask(cache_entry.writeMask); + peek(coreRequestNetwork_in, CPURequestMsg) { + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } @@ -534,6 +611,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Type := CoherenceRequestType:Atomic; out_msg.Dirty := true; out_msg.writeMask.orMask(in_msg.writeMask); + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -549,6 +628,10 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Ntsl := true; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Control; + peek(probeNetwork_in, NBProbeRequestMsg) { + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { @@ -592,6 +675,10 @@ machine(MachineType:TCC, "TCC Cache") tbe.atomicDoneCnt := tbe.atomicDoneCnt + 1; out_msg.addr := address; out_msg.Type := TriggerType:AtomicDone; + peek(responseFromNB_in, ResponseMsg) { + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } } @@ -659,6 +746,54 @@ machine(MachineType:TCC, "TCC Cache") p_popRequestQueue; } + transition(I, RdBypassEvict) {TagArrayRead} { + p_profileMiss; + t_allocateTBE; + rd_requestData; + p_popRequestQueue; + } + +// Transition to be called when a read request with SLC flag set arrives at +// entry in state W. It evicts and invalidates the cache entry before +// forwarding the request to global memory + transition(W, RdBypassEvict, I) {TagArrayRead} { + p_profileMiss; + t_allocateTBE; + wb_writeBack; + i_invL2; + rd_requestData; + p_popRequestQueue; + } + +// Transition to be called when a read request with SLC flag set arrives at +// entry in state M. It evicts and invalidates the cache entry before +// forwarding the request to global memory to main memory + transition(M, RdBypassEvict, I) {TagArrayRead} { + p_profileMiss; + t_allocateTBE; + wb_writeBack; + i_invL2; + rd_requestData; + p_popRequestQueue; + } + +// Transition to be called when a read request with SLC flag set arrives at +// entry in state V. It invalidates the cache entry before forwarding the +// request to global memory. + transition(V, RdBypassEvict, I) {TagArrayRead} { + p_profileMiss; + t_allocateTBE; + i_invL2; + rd_requestData; + p_popRequestQueue; + } + +// Transition to be called when a read request with SLC flag arrives at entry +// in transient state. The request stalls until the pending transition is complete. + transition({WI, IV}, RdBypassEvict) { + st_stallAndWaitRequest; + } + transition(V, Atomic, A) {TagArrayRead} { p_profileHit; i_invL2; @@ -730,6 +865,31 @@ transition(I, Atomic, A) {TagArrayRead} { p_popRequestQueue; } +// Transition to be called when a write request with SLC bit set arrives at an +// entry with state V. The entry has to be evicted and invalidated before the +// request is forwarded to global memory + transition(V, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + p_profileMiss; + ut_updateTag; + t_allocateTBE; + wt_writeThrough; + i_invL2; + p_popRequestQueue; + } + +// Transition to be called when a write request with SLC bit set arrives at an +// entry with state W. The entry has to be evicted and invalidated before the +// request is forwarded to global memory. + transition(W, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} { + p_profileMiss; + ut_updateTag; + wdb_writeDirtyBytes; + t_allocateTBE; + wb_writeBack; + i_invL2; + p_popRequestQueue; + } + transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} { t_allocateTBE; wb_writeBack; @@ -764,6 +924,16 @@ transition(I, Atomic, A) {TagArrayRead} { pp_popProbeQueue; } +// Transition to be called when the response for a request with SLC bit set +// arrives. The request has to be forwarded to the core that needs it while +// making sure no entry is allocated. + transition(I, Bypass, I) { + rb_bypassDone; + pr_popResponseQueue; + wada_wakeUpAllDependentsAddr; + dt_deallocateTBE; + } + transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} { a_allocateBlock; ut_updateTag; diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 775a62b174..3be1397d49 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -60,6 +60,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") enumeration(Event, desc="TCP Events") { // Core initiated Load, desc="Load"; + LoadBypassEvict, desc="Bypass L1 on a load. Evict if cache block already allocated"; Store, desc="Store to L1 (L1 is dirty)"; StoreThrough, desc="Store directly to L2(L1 is clean)"; Atomic, desc="Atomic"; @@ -256,8 +257,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") Entry cache_entry := getCacheEntry(in_msg.addr); TBE tbe := TBEs.lookup(in_msg.addr); if (in_msg.Type == CoherenceResponseType:TDSysResp) { - // disable L1 cache - if (disableL1) { + if (disableL1 || in_msg.isGLCSet || in_msg.isSLCSet) { + // If L1 is disabled or requests have GLC or SLC flag set, + // then, the requests should not cache in the L1. The response + // from L2/global memory should bypass the cache trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); } else { if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) { @@ -284,13 +287,23 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") TBE tbe := TBEs.lookup(in_msg.LineAddress); DPRINTF(RubySlicc, "%s\n", in_msg); if (in_msg.Type == RubyRequestType:LD) { - trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe); + if ((in_msg.isGLCSet || in_msg.isSLCSet) && is_valid(cache_entry)) { + // Read rquests with GLC or SLC bit set should not cache in the L1. + // They need to bypass the L1 and go to the L2. If an entry exists + // in the L1, it needs to be evicted + trigger(Event:LoadBypassEvict, in_msg.LineAddress, cache_entry, tbe); + } + else { + trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe); + } } else if (in_msg.Type == RubyRequestType:ATOMIC || in_msg.Type == RubyRequestType:ATOMIC_RETURN || in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN) { trigger(Event:Atomic, in_msg.LineAddress, cache_entry, tbe); } else if (in_msg.Type == RubyRequestType:ST) { - if(disableL1) { + if(disableL1 || in_msg.isGLCSet || in_msg.isSLCSet) { + // Write requests with GLC or SLC bit set, or when L1 is disabled, + // should not cache in the L1. They need to perform a store through trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe); } else { if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { @@ -330,6 +343,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); + peek(mandatoryQueue_in, RubyRequest) { + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } @@ -375,6 +392,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -401,6 +420,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") // forward inst sequence number to lower TCC peek(mandatoryQueue_in, RubyRequest) { out_msg.instSeqNum := in_msg.instSeqNum; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -418,6 +439,11 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") out_msg.Type := CoherenceRequestType:Atomic; out_msg.InitialRequestTime := curCycle(); out_msg.Shared := false; + peek(mandatoryQueue_in, RubyRequest) { + out_msg.instSeqNum := in_msg.instSeqNum; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; + } } } } @@ -583,6 +609,17 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") p_popMandatoryQueue; } +// Transition to be called when a load request with GLC or SLC flag set arrives +// at L1. This transition invalidates any existing entry and forwards the +// request to L2. + transition(V, LoadBypassEvict, I) {TagArrayRead, TagArrayWrite} { + uu_profileDataMiss; + inv_invDone; + ic_invCache; + n_issueRdBlk; + p_popMandatoryQueue; +} + transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} { t_allocateTBE; mru_updateMRU; diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 3b38e3b1ff..57edef8f2b 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -161,6 +161,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") uint64_t probe_id, desc="probe id for lifetime profiling"; WriteMask writeMask, desc="outstanding write through mask"; int Len, desc="Length of memory request for DMA"; + bool isGLCSet, desc="Bypass L1 Cache"; + bool isSLCSet, desc="Bypass L1 and L2 Cache"; } structure(TBETable, external="yes") { @@ -483,6 +485,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; out_msg.OriginalResponder := tbe.LastSender; out_msg.L3Hit := tbe.L3Hit; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -512,6 +516,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ForwardRequestTime := tbe.ForwardRequestTime; out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; out_msg.OriginalResponder := tbe.LastSender; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; if(tbe.atomicData){ out_msg.WTRequestor := tbe.WTRequestor; } @@ -540,6 +546,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.InitialRequestTime := tbe.InitialRequestTime; out_msg.ForwardRequestTime := curCycle(); out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -557,6 +565,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ForwardRequestTime := curCycle(); out_msg.ProbeRequestStartTime := curCycle(); out_msg.instSeqNum := in_msg.instSeqNum; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -569,6 +579,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := in_msg.DataBlk; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -624,6 +636,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Type := MemoryRequestType:MEMORY_READ; out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -739,6 +753,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination := probe_dests; tbe.NumPendingAcks := out_msg.Destination.count(); + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); APPEND_TRANSITION_COMMENT(" dc: Acks remaining: "); APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); @@ -842,6 +858,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination := probe_dests; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; tbe.NumPendingAcks := out_msg.Destination.count(); DPRINTF(RubySlicc, "%s\n", (out_msg)); APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); @@ -897,6 +915,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ReturnData := false; out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination := probe_dests; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; tbe.NumPendingAcks := out_msg.Destination.count(); APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); @@ -923,6 +943,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := in_msg.DataBlk; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } if (tbe.Dirty == false) { // have to update the TBE, too, because of how this @@ -985,6 +1007,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.NumPendingAcks := 0; tbe.Cached := in_msg.ForceShared; tbe.InitialRequestTime := in_msg.InitialRequestTime; + tbe.isGLCSet := in_msg.isGLCSet; + tbe.isSLCSet := in_msg.isSLCSet; } } @@ -1004,6 +1028,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := tbe.DataBlk; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(ProtocolTrace, "%s\n", out_msg); } } @@ -1104,6 +1130,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := victim_entry.DataBlk; + out_msg.isGLCSet := in_msg.isGLCSet; + out_msg.isSLCSet := in_msg.isSLCSet; } L3CacheMemory.deallocate(victim); } @@ -1136,6 +1164,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := victim_entry.DataBlk; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; } L3CacheMemory.deallocate(victim); } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm index 46bab43c22..6ff19e953b 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm @@ -138,6 +138,9 @@ structure(CPURequestMsg, desc="...", interface="Message") { bool NoWriteConflict, default="true", desc="write collided with CAB entry"; int ProgramCounter, desc="PC that accesses to this block"; + bool isGLCSet, default="false", desc="GLC flag value in the request"; + bool isSLCSet, default="false", desc="SLC flag value in the request"; + bool functionalRead(Packet *pkt) { // Only PUTX messages contains the data block if (Type == CoherenceRequestType:VicDirty) { @@ -165,6 +168,8 @@ structure(NBProbeRequestMsg, desc="...", interface="Message") { MachineID Requestor, desc="Requestor id for 3-hop requests"; bool NoAckNeeded, default="false", desc="For short circuting acks"; int ProgramCounter, desc="PC that accesses to this block"; + bool isGLCSet, desc="Bypass L1 Cache"; + bool isSLCSet, desc="Bypass L1 and L2 Caches"; bool functionalRead(Packet *pkt) { return false; @@ -248,6 +253,9 @@ structure(ResponseMsg, desc="...", interface="Message") { int ProgramCounter, desc="PC that issues this request"; bool mispred, desc="tell TCP if the block should not be bypassed"; + bool isGLCSet, default="false", desc="GLC flag value in the request that triggered response"; + bool isSLCSet, default="false", desc="SLC flag value in the request that triggered response"; + bool functionalRead(Packet *pkt) { // Only PUTX messages contains the data block @@ -277,6 +285,8 @@ structure(UnblockMsg, desc="...", interface="Message") { bool wasValid, default="false", desc="Was block valid when evicted"; bool valid, default="false", desc="Is block valid"; bool validToInvalid, default="false", desc="Was block valid when evicted"; + bool isGLCSet, default="false", desc="GLC flag value in the request"; + bool isSLCSet, default="false", desc="SLC flag value in the request"; bool functionalRead(Packet *pkt) { return false; @@ -321,6 +331,8 @@ structure(TriggerMsg, desc="...", interface="Message") { TriggerType Type, desc="Type of trigger"; CacheId Dest, default="CacheId_NA", desc="Cache to invalidate"; int ProgramCounter, desc="PC that accesses to this block"; + bool isGLCSet, default="false", desc="GLC flag value in the request"; + bool isSLCSet, default="false", desc="SLC flag value in the request"; bool functionalRead(Packet *pkt) { return false; diff --git a/src/mem/ruby/protocol/RubySlicc_MemControl.sm b/src/mem/ruby/protocol/RubySlicc_MemControl.sm index e8517a4a07..012b169dea 100644 --- a/src/mem/ruby/protocol/RubySlicc_MemControl.sm +++ b/src/mem/ruby/protocol/RubySlicc_MemControl.sm @@ -74,6 +74,8 @@ structure(MemoryMsg, desc="...", interface="Message") { PrefetchBit Prefetch, desc="Is this a prefetch request"; bool ReadX, desc="Exclusive"; int Acks, desc="How many acks to expect"; + bool isGLCSet, desc="Bypass L1 Cache"; + bool isSLCSet, desc="Bypass L1 and L2 Caches"; bool functionalRead(Packet *pkt) { if ((MessageSize == MessageSizeType:Response_Data) || diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 8d76f78f76..8ba9d935ff 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -177,6 +177,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") { int htmTransactionUid, desc="Used to identify the unique HTM transaction that produced this request"; bool isTlbi, desc="Memory request is a TLB shootdown (invalidation) operation"; Addr tlbiTransactionUid, desc="Unique identifier of the TLB shootdown operation that produced this request"; + bool isGLCSet, default="false",desc="If flag is set, bypass GPU L1 cache"; + bool isSLCSet, default="false",desc="If flag is set, bypass GPU L1 and L2 caches"; RequestPtr getRequestPtr(); } diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index 2345c224fb..89ce83451e 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -79,6 +79,11 @@ class RubyRequest : public Message bool m_isTlbi; // Should be uint64, but SLICC complains about casts Addr m_tlbiTransactionUid; + // GPU cache bypass flags. GLC bypasses L1 while SLC bypasses both L1 and + // L2 if set to true. They are set to false by default and they must be + // explicitly set to true in the program in order to bypass caches + bool m_isGLCSet; + bool m_isSLCSet; RubyRequest(Tick curTime, uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, @@ -99,6 +104,13 @@ class RubyRequest : public Message m_tlbiTransactionUid(0) { m_LineAddress = makeLineAddress(m_PhysicalAddress); + if (_pkt) { + m_isGLCSet = m_pkt->req->isGLCSet(); + m_isSLCSet = m_pkt->req->isSLCSet(); + } else { + m_isGLCSet = 0; + m_isSLCSet = 0; + } } /** RubyRequest for memory management commands */ @@ -120,6 +132,13 @@ class RubyRequest : public Message m_tlbiTransactionUid(0) { assert(m_pkt->req->isMemMgmt()); + if (_pkt) { + m_isGLCSet = m_pkt->req->isGLCSet(); + m_isSLCSet = m_pkt->req->isSLCSet(); + } else { + m_isGLCSet = 0; + m_isSLCSet = 0; + } } RubyRequest(Tick curTime, uint64_t _paddr, int _len, @@ -148,6 +167,13 @@ class RubyRequest : public Message m_tlbiTransactionUid(0) { m_LineAddress = makeLineAddress(m_PhysicalAddress); + if (_pkt) { + m_isGLCSet = m_pkt->req->isGLCSet(); + m_isSLCSet = m_pkt->req->isSLCSet(); + } else { + m_isGLCSet = 0; + m_isSLCSet = 0; + } } RubyRequest(Tick curTime, uint64_t _paddr, int _len, @@ -177,6 +203,14 @@ class RubyRequest : public Message m_tlbiTransactionUid(0) { m_LineAddress = makeLineAddress(m_PhysicalAddress); + if (_pkt) { + m_isGLCSet = m_pkt->req->isGLCSet(); + m_isSLCSet = m_pkt->req->isSLCSet(); + + } else { + m_isGLCSet = 0; + m_isSLCSet = 0; + } } RubyRequest(Tick curTime) : Message(curTime) {} From 28a871b0374fe6a38fa1738570ebc139e950cf7e Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 30 Dec 2022 20:11:43 +0000 Subject: [PATCH 078/492] scons: Re-add -Werror for gem5 develop branch This is removed from the stable branch to avoid build errors but should included on the develop branch to aid developers. This reverts commit 7dd61c865975862b099e1af5e867083ac9307d9b. Change-Id: I1fe249ce87aa8d70c1f092fc7db1554e6aee7355 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67052 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- SConstruct | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/SConstruct b/SConstruct index e8107ea2c7..bd26e4552e 100755 --- a/SConstruct +++ b/SConstruct @@ -420,6 +420,14 @@ for variant_path in variant_paths: conf.CheckLinkFlag('-Wl,--threads') conf.CheckLinkFlag( '-Wl,--thread-count=%d' % GetOption('num_jobs')) + + # Treat warnings as errors but white list some warnings that we + # want to allow (e.g., deprecation warnings). + env.Append(CCFLAGS=['-Werror', + '-Wno-error=deprecated-declarations', + '-Wno-error=deprecated', + ]) + else: error('\n'.join(( "Don't know what compiler options to use for your compiler.", From bbeec2d758606470436d972b0ac8d04484000fa8 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 30 Dec 2022 20:28:55 +0000 Subject: [PATCH 079/492] misc: Update version info for develop branch Change-Id: Icd409acda0e88852938b2af9f170e2a410e91f8c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67053 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- ext/sst/README.md | 4 +- ext/testlib/configuration.py | 2 +- src/Doxyfile | 2 +- src/base/version.cc | 2 +- src/python/gem5/resources/downloader.py | 2 +- tests/compiler-tests.sh | 2 +- tests/jenkins/presubmit.sh | 4 +- tests/nightly.sh | 2 +- tests/weekly.sh | 2 +- util/dockerfiles/docker-compose.yaml | 50 ++++++++++++------------- util/dockerfiles/gcn-gpu/Dockerfile | 2 +- 11 files changed, 37 insertions(+), 37 deletions(-) diff --git a/ext/sst/README.md b/ext/sst/README.md index 49f56349bb..1f37cb4c44 100644 --- a/ext/sst/README.md +++ b/ext/sst/README.md @@ -62,7 +62,7 @@ See `INSTALL.md`. Downloading the built bootloader containing a Linux Kernel and a workload, ```sh -wget http://dist.gem5.org/dist/v22-1/misc/riscv/bbl-busybox-boot-exit +wget http://dist.gem5.org/dist/develop/misc/riscv/bbl-busybox-boot-exit ``` Running the simulation @@ -87,7 +87,7 @@ extract them under the $M5_PATH directory (make sure M5_PATH points to a valid directory): ```sh -wget http://dist.gem5.org/dist/v22-1/arm/aarch-sst-20211207.tar.bz2 +wget http://dist.gem5.org/dist/develop/arm/aarch-sst-20211207.tar.bz2 tar -xf aarch-sst-20211207.tar.bz2 # copying bootloaders diff --git a/ext/testlib/configuration.py b/ext/testlib/configuration.py index 97c637687d..fd47e3b33a 100644 --- a/ext/testlib/configuration.py +++ b/ext/testlib/configuration.py @@ -213,7 +213,7 @@ def define_defaults(defaults): os.pardir, os.pardir)) defaults.result_path = os.path.join(os.getcwd(), 'testing-results') - defaults.resource_url = 'http://dist.gem5.org/dist/v22-1' + defaults.resource_url = 'http://dist.gem5.org/dist/develop' defaults.resource_path = os.path.abspath(os.path.join(defaults.base_dir, 'tests', 'gem5', diff --git a/src/Doxyfile b/src/Doxyfile index 4d14b7ccb7..24d70bbc55 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = gem5 # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = v22.1.0.0 +PROJECT_NUMBER = [DEVELOP-FOR-23.0] # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/src/base/version.cc b/src/base/version.cc index 050aea091f..8131a3197e 100644 --- a/src/base/version.cc +++ b/src/base/version.cc @@ -32,6 +32,6 @@ namespace gem5 /** * @ingroup api_base_utils */ -const char *gem5Version = "22.1.0.0"; +const char *gem5Version = "[DEVELOP-FOR-23.0]"; } // namespace gem5 diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py index f619b9771d..1fda8d86b6 100644 --- a/src/python/gem5/resources/downloader.py +++ b/src/python/gem5/resources/downloader.py @@ -55,7 +55,7 @@ def _resources_json_version_required() -> str: """ Specifies the version of resources.json to obtain. """ - return "22.1" + return "develop" def _get_resources_json_uri() -> str: diff --git a/tests/compiler-tests.sh b/tests/compiler-tests.sh index 044ceb2adc..f5d4bb189f 100755 --- a/tests/compiler-tests.sh +++ b/tests/compiler-tests.sh @@ -114,7 +114,7 @@ for compiler in ${images[@]}; do # targets for this test build_indices=(${build_permutation[@]:0:$builds_count}) - repo_name="${base_url}/${compiler}:v22-1" + repo_name="${base_url}/${compiler}:latest" # Grab compiler image docker pull $repo_name >/dev/null diff --git a/tests/jenkins/presubmit.sh b/tests/jenkins/presubmit.sh index 36da3facd1..91eb95f81b 100755 --- a/tests/jenkins/presubmit.sh +++ b/tests/jenkins/presubmit.sh @@ -37,8 +37,8 @@ set -e -DOCKER_IMAGE_ALL_DEP=gcr.io/gem5-test/ubuntu-22.04_all-dependencies:v22-1 -DOCKER_IMAGE_CLANG_COMPILE=gcr.io/gem5-test/clang-version-14:v22-1 +DOCKER_IMAGE_ALL_DEP=gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest +DOCKER_IMAGE_CLANG_COMPILE=gcr.io/gem5-test/clang-version-14:latest PRESUBMIT_STAGE2=tests/jenkins/presubmit-stage2.sh GEM5ART_TESTS=tests/jenkins/gem5art-tests.sh diff --git a/tests/nightly.sh b/tests/nightly.sh index bf05154fe7..1360c4435c 100755 --- a/tests/nightly.sh +++ b/tests/nightly.sh @@ -37,7 +37,7 @@ docker_mem_limit="18g" # The docker tag to use (varies between develop, and versions on the staging # branch) -tag="v22-1" +tag="latest" # The first argument is the number of threads to be used for compilation. If no # argument is given we default to one. diff --git a/tests/weekly.sh b/tests/weekly.sh index 9b400b9a83..c7f834b7a5 100755 --- a/tests/weekly.sh +++ b/tests/weekly.sh @@ -37,7 +37,7 @@ docker_mem_limit="24g" # The docker tag to use (varies between develop, and versions on the staging # branch) -tag="v22-1" +tag="latest" # We assume the first two arguments are the number of threads followed by the # GPU ISA to test. These default to 1 and GCN3_X86 is no argument is given. diff --git a/util/dockerfiles/docker-compose.yaml b/util/dockerfiles/docker-compose.yaml index 004052283a..39579962b1 100644 --- a/util/dockerfiles/docker-compose.yaml +++ b/util/dockerfiles/docker-compose.yaml @@ -5,154 +5,154 @@ services: build: context: gcn-gpu dockerfile: Dockerfile - image: gcr.io/gem5-test/gcn-gpu:v22-1 + image: gcr.io/gem5-test/gcn-gpu:latest gpu-fs: build: context: gpu-fs dockerfile: Dockerfile - image: gcr.io/gem5-test/gpu-fs:v22-1 + image: gcr.io/gem5-test/gpu-fs:latest sst: build: context: sst-11.1.0 dockerfile: Dockerfile - image: gcr.io/gem5-test/sst-env:v22-1 + image: gcr.io/gem5-test/sst-env:latest systemc: build: context: systemc-2.3.3 dockerfile: Dockerfile - image: gcr.io/gem5-test/systemc-env:v22-1 + image: gcr.io/gem5-test/systemc-env:latest ubuntu-18.04_all-dependencies: build: context: ubuntu-18.04_all-dependencies dockerfile: Dockerfile - image: gcr.io/gem5-test/ubuntu-18.04_all-dependencies:v22-1 + image: gcr.io/gem5-test/ubuntu-18.04_all-dependencies:latest ubuntu-20.04_all-dependencies: build: context: ubuntu-20.04_all-dependencies dockerfile: Dockerfile - image: gcr.io/gem5-test/ubuntu-20.04_all-dependencies:v22-1 + image: gcr.io/gem5-test/ubuntu-20.04_all-dependencies:latest ubuntu-22.04_all-dependencies: build: context: ubuntu-22.04_all-dependencies dockerfile: Dockerfile - image: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:v22-1 + image: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest ubuntu-22.04_min-dependencies: build: context: ubuntu-22.04_min-dependencies dockerfile: Dockerfile - image: gcr.io/gem5-test/ubuntu-22.04_min-dependencies:v22-1 + image: gcr.io/gem5-test/ubuntu-22.04_min-dependencies:latest gcc-7: build: context: ubuntu-18.04_gcc-version dockerfile: Dockerfile args: - version=7 - image: gcr.io/gem5-test/gcc-version-7:v22-1 + image: gcr.io/gem5-test/gcc-version-7:latest gcc-8: build: context: ubuntu-18.04_gcc-version dockerfile: Dockerfile args: - version=8 - image: gcr.io/gem5-test/gcc-version-8:v22-1 + image: gcr.io/gem5-test/gcc-version-8:latest gcc-9: build: context: ubuntu-20.04_gcc-version dockerfile: Dockerfile args: - version=9 - image: gcr.io/gem5-test/gcc-version-9:v22-1 + image: gcr.io/gem5-test/gcc-version-9:latest gcc-10: build: context: ubuntu-20.04_gcc-version dockerfile: Dockerfile args: - version=10 - image: gcr.io/gem5-test/gcc-version-10:v22-1 + image: gcr.io/gem5-test/gcc-version-10:latest gcc-11: build: context: ubuntu-22.04_gcc-version dockerfile: Dockerfile args: - version=11 - image: gcr.io/gem5-test/gcc-version-11:v22-1 + image: gcr.io/gem5-test/gcc-version-11:latest gcc-12: build: context: ubuntu-22.04_gcc-version dockerfile: Dockerfile args: - version=12 - image: gcr.io/gem5-test/gcc-version-12:v22-1 + image: gcr.io/gem5-test/gcc-version-12:latest clang-6: build: context: ubuntu-18.04_clang-version dockerfile: Dockerfile args: - version=6.0 - image: gcr.io/gem5-test/clang-version-6.0:v22-1 + image: gcr.io/gem5-test/clang-version-6.0:latest clang-7: build: context: ubuntu-18.04_clang-version dockerfile: Dockerfile args: - version=7 - image: gcr.io/gem5-test/clang-version-7:v22-1 + image: gcr.io/gem5-test/clang-version-7:latest clang-8: build: context: ubuntu-18.04_clang-version dockerfile: Dockerfile args: - version=8 - image: gcr.io/gem5-test/clang-version-8:v22-1 + image: gcr.io/gem5-test/clang-version-8:latest clang-9: build: context: ubuntu-18.04_clang-version dockerfile: Dockerfile args: - version=9 - image: gcr.io/gem5-test/clang-version-9:v22-1 + image: gcr.io/gem5-test/clang-version-9:latest clang-10: build: context: ubuntu-20.04_clang-version dockerfile: Dockerfile args: - version=10 - image: gcr.io/gem5-test/clang-version-10:v22-1 + image: gcr.io/gem5-test/clang-version-10:latest clang-11: build: context: ubuntu-20.04_clang-version dockerfile: Dockerfile args: - version=11 - image: gcr.io/gem5-test/clang-version-11:v22-1 + image: gcr.io/gem5-test/clang-version-11:latest clang-12: build: context: ubuntu-20.04_clang-version dockerfile: Dockerfile args: - version=12 - image: gcr.io/gem5-test/clang-version-12:v22-1 + image: gcr.io/gem5-test/clang-version-12:latest clang-13: build: context: ubuntu-22.04_clang-version dockerfile: Dockerfile args: - version=13 - image: gcr.io/gem5-test/clang-version-13:v22-1 + image: gcr.io/gem5-test/clang-version-13:latest clang-14: build: context: ubuntu-22.04_clang-version dockerfile: Dockerfile args: - version=14 - image: gcr.io/gem5-test/clang-version-14:v22-1 + image: gcr.io/gem5-test/clang-version-14:latest llvm-gnu-cross-compiler-riscv64: build: context: llvm-gnu-cross-compiler-riscv64 dockerfile: Dockerfile - image: gcr.io/gem5-test/llvm-gnu-cross-compiler-riscv64:v22-1 + image: gcr.io/gem5-test/llvm-gnu-cross-compiler-riscv64:latest gem5-all-min-dependencies: build: context: gem5-all-min-dependencies dockerfile: Dockerfile - image: gcr.io/gem5-test/gem5-all-min-dependencies:v22-1 + image: gcr.io/gem5-test/gem5-all-min-dependencies:latest diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index dfff455079..c5db8963a8 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -69,7 +69,7 @@ RUN git clone -b rocm-4.0.0 \ WORKDIR /ROCclr # The patch allows us to avoid building blit kernels on-the-fly in gem5 -RUN wget -q -O - dist.gem5.org/dist/v22-1/rocm_patches/ROCclr.patch | git apply -v +RUN wget -q -O - dist.gem5.org/dist/develop/rocm_patches/ROCclr.patch | git apply -v WORKDIR /ROCclr/build RUN cmake -DOPENCL_DIR="/ROCm-OpenCL-Runtime" \ From f99a3c1f96bb4a56cbb9b85d52829d606411649f Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 16 Dec 2022 13:23:01 -0800 Subject: [PATCH 080/492] arch-vega: Fix signed BFE instructions The bitfield extract instructions come in unsigned and signed variants. The documentation on this is not correct, however the GCN3 documentation gives some clues. The instruction should extract an N-bit integer where N is defined in a source operand starting at some bit also defined by a source operand. For signed variants of this instruction, the N-bit integer should be sign extended but is currently not. This changeset does sign extension using the runtime value of N by ORing the upper bits with ones if the most significant bit is one. This was verified by writing these instructions in assembly and running on a real GPU. Changes are made to v_bfe_i32, s_bfe_i32, and s_bfe_i64. Change-Id: Ia192f5940200c6de48867b02f709a7f1b2daa974 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66751 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index f5b08b7ce1..c9e57bc2f7 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -1302,6 +1302,21 @@ namespace VegaISA sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) & ((1 << bits(src1.rawData(), 22, 16)) - 1); + + // Above extracted a signed int of size src1[22:16] bits which needs + // to be signed-extended. Check if the MSB of our src1[22:16]-bit + // integer is 1, and sign extend it is. + // + // Note: The description in the Vega ISA manual does not mention to + // sign-extend the result. An update description can be found in the + // more recent RDNA3 manual here: + // https://developer.amd.com/wp-content/resources/ + // RDNA3_Shader_ISA_December2022.pdf + if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { + sdst = sdst.rawData() + | (0xffffffff << bits(src1.rawData(), 22, 16)); + } + scc = sdst.rawData() ? 1 : 0; sdst.write(); @@ -1373,6 +1388,14 @@ namespace VegaISA sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) & ((1 << bits(src1.rawData(), 22, 16)) - 1); + + // Above extracted a signed int of size src1[22:16] bits which needs + // to be signed-extended. Check if the MSB of our src1[22:16]-bit + // integer is 1, and sign extend it is. + if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { + sdst = sdst.rawData() + | 0xffffffffffffffff << bits(src1.rawData(), 22, 16); + } scc = sdst.rawData() ? 1 : 0; sdst.write(); @@ -30544,6 +30567,13 @@ namespace VegaISA if (wf->execMask(lane)) { vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) & ((1 << bits(src2[lane], 4, 0)) - 1); + + // Above extracted a signed int of size src2 bits which needs + // to be signed-extended. Check if the MSB of our src2-bit + // integer is 1, and sign extend it is. + if (vdst[lane] >> (bits(src2[lane], 4, 0) - 1)) { + vdst[lane] |= 0xffffffff << bits(src2[lane], 4, 0); + } } } From bbdebc25daec686cc06e159c28de005ca5875f5b Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 16 Dec 2022 13:35:02 -0800 Subject: [PATCH 081/492] arch-vega: Fix several issues with DPP DPP processing has several issues which are fixed in this changeset: 1) Incorrect comment is updated 2) newLane calculation for shift/rotate instructions is corrected 3) A copy of original data is made so that a copy of a copy is not made 4) Reset all booleans (OOB, zeroSrc, laneDisabled) after each lane iteration The shift, rotate, and broadcast variants were tested by implementing them in assembly and running on silicon. Change-Id: If86fbb26c87eaca4ef0587fd846978115858b168 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66752 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/inst_util.hh | 57 +++++++++++++++---------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/inst_util.hh b/src/arch/amdgpu/vega/insts/inst_util.hh index 01925f9d07..7ec2e2ddd3 100644 --- a/src/arch/amdgpu/vega/insts/inst_util.hh +++ b/src/arch/amdgpu/vega/insts/inst_util.hh @@ -303,9 +303,9 @@ namespace VegaISA * Currently the values are: * 0x0 - 0xFF: full permute of four threads * 0x100: reserved - * 0x101 - 0x10F: row shift right by 1-15 threads + * 0x101 - 0x10F: row shift left by 1-15 threads * 0x111 - 0x11F: row shift right by 1-15 threads - * 0x121 - 0x12F: row shift right by 1-15 threads + * 0x121 - 0x12F: row rotate right by 1-15 threads * 0x130: wavefront left shift by 1 thread * 0x134: wavefront left rotate by 1 thread * 0x138: wavefront right shift by 1 thread @@ -322,7 +322,8 @@ namespace VegaISA // newLane will be the same as the input lane unless swizzling happens int newLane = currLane; // for shift/rotate permutations; positive values are LEFT rotates - int count = 1; + // shift/rotate left means lane n -> lane n-1 (e.g., lane 1 -> lane 0) + int count = 0; int localRowOffset = rowOffset; int localRowNum = rowNum; @@ -335,51 +336,47 @@ namespace VegaISA panic("ERROR: instruction using reserved DPP_CTRL value\n"); } else if ((dppCtrl >= SQ_DPP_ROW_SL1) && (dppCtrl <= SQ_DPP_ROW_SL15)) { // DPP_ROW_SL{1:15} - count -= (dppCtrl - SQ_DPP_ROW_SL1 + 1); + count = (dppCtrl - SQ_DPP_ROW_SL1 + 1); if ((localRowOffset + count >= 0) && (localRowOffset + count < ROW_SIZE)) { localRowOffset += count; - newLane = (rowNum | localRowOffset); + newLane = ((rowNum * ROW_SIZE) | localRowOffset); } else { outOfBounds = true; } } else if ((dppCtrl >= SQ_DPP_ROW_SR1) && (dppCtrl <= SQ_DPP_ROW_SR15)) { // DPP_ROW_SR{1:15} - count -= (dppCtrl - SQ_DPP_ROW_SR1 + 1); + count = -(dppCtrl - SQ_DPP_ROW_SR1 + 1); if ((localRowOffset + count >= 0) && (localRowOffset + count < ROW_SIZE)) { localRowOffset += count; - newLane = (rowNum | localRowOffset); + newLane = ((rowNum * ROW_SIZE) | localRowOffset); } else { outOfBounds = true; } } else if ((dppCtrl >= SQ_DPP_ROW_RR1) && (dppCtrl <= SQ_DPP_ROW_RR15)) { // DPP_ROW_RR{1:15} - count -= (dppCtrl - SQ_DPP_ROW_RR1 + 1); + count = -(dppCtrl - SQ_DPP_ROW_RR1 + 1); localRowOffset = (localRowOffset + count + ROW_SIZE) % ROW_SIZE; - newLane = (rowNum | localRowOffset); + newLane = ((rowNum * ROW_SIZE) | localRowOffset); } else if (dppCtrl == SQ_DPP_WF_SL1) { // DPP_WF_SL1 - count = 1; if ((currLane >= 0) && (currLane < NumVecElemPerVecReg)) { - newLane += count; + newLane += 1; } else { outOfBounds = true; } } else if (dppCtrl == SQ_DPP_WF_RL1) { // DPP_WF_RL1 - count = 1; - newLane = (currLane + count + NumVecElemPerVecReg) % + newLane = (currLane - 1 + NumVecElemPerVecReg) % NumVecElemPerVecReg; } else if (dppCtrl == SQ_DPP_WF_SR1) { // DPP_WF_SR1 - count = -1; - int currVal = (currLane + count); + int currVal = (currLane - 1); if ((currVal >= 0) && (currVal < NumVecElemPerVecReg)) { - newLane += count; + newLane -= 1; } else { outOfBounds = true; } } else if (dppCtrl == SQ_DPP_WF_RR1) { // DPP_WF_RR1 - count = -1; - newLane = (currLane + count + NumVecElemPerVecReg) % + newLane = (currLane - 1 + NumVecElemPerVecReg) % NumVecElemPerVecReg; } else if (dppCtrl == SQ_DPP_ROW_MIRROR) { // DPP_ROW_MIRROR localRowOffset = (15 - localRowOffset); @@ -392,12 +389,22 @@ namespace VegaISA } else if (dppCtrl == SQ_DPP_ROW_BCAST15) { // DPP_ROW_BCAST15 count = 15; if (currLane > count) { - newLane = (currLane & ~count) - 1; + // 0x30 selects which set of 16 lanes to use. We broadcast the + // last lane of one set to all lanes of the next set (e.g., + // lane 15 is written to 16-31, 31 to 32-47, 47 to 48-63). + newLane = (currLane & 0x30) - 1; + } else { + outOfBounds = true; } } else if (dppCtrl == SQ_DPP_ROW_BCAST31) { // DPP_ROW_BCAST31 count = 31; if (currLane > count) { - newLane = (currLane & ~count) - 1; + // 0x20 selects either the upper 32 or lower 32 lanes and + // broadcasts the last lane of one set to all lanes of the + // next set (e.g., lane 31 is written to 32-63). + newLane = (currLane & 0x20) - 1; + } else { + outOfBounds = true; } } else { panic("Unimplemented DPP control operation: %d\n", dppCtrl); @@ -443,6 +450,9 @@ namespace VegaISA src0.absModifier(); } + // Need a copy of the original data since we update one lane at a time + T src0_copy = src0; + // iterate over all register lanes, performing steps 2-4 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { threadValid = (0x1LL << lane); @@ -458,7 +468,6 @@ namespace VegaISA if (((rowMask & (0x1 << rowNum)) == 0) /* row mask */ || ((bankMask & (0x1 << bankNum)) == 0) /* bank mask */) { laneDisabled = true; - continue; } /** @@ -495,7 +504,7 @@ namespace VegaISA } else { threadValid = 0; } - } else if (!gpuDynInst->exec_mask[lane]) { + } else if (!gpuDynInst->wavefront()->execMask(lane)) { if (boundCtrl == 1) { zeroSrc = true; } else { @@ -505,13 +514,15 @@ namespace VegaISA if (threadValid != 0 && !outOfBounds && !zeroSrc) { assert(!laneDisabled); - src0[outLane] = src0[lane]; + src0[lane] = src0_copy[outLane]; } else if (zeroSrc) { src0[lane] = 0; } // reset for next iteration laneDisabled = false; + outOfBounds = false; + zeroSrc = false; } } From 64496338278f337e38190af97b9208bb9882495a Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 16 Dec 2022 13:39:24 -0800 Subject: [PATCH 082/492] arch-vega: Add DPP support for V_AND_B32 A DPP variant of V_AND_B32 was found in rocPRIM. With this changeset the unit tests for rocPRIM scan_inclusive are passing. Change-Id: I5a65f2cf6b56ac13609b191e3b3dfeb55e630942 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66753 Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 34 +++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index c9e57bc2f7..1f37ff14cc 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -6844,15 +6844,41 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); VecOperandU32 vdst(gpuDynInst, instData.VDST); src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; + if (isDPPInst()) { + VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_dpp[lane] & src1[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] & src1[lane]; + } } } From e392603d8126f8d7de0c31ae366dc668d90868ab Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 09:08:25 -0800 Subject: [PATCH 083/492] arch-vega: Add missing operand size for ds_write2st64_b64 This instruction takes three operands (address, and two datas) but there were only operand sizes for two operands tripping assert in default case. Change-Id: I3f505b6432aee5f3f265acac46b83c0c7daff3e7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67071 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.hh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 0671df8452..1c422481de 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -33553,7 +33553,9 @@ namespace VegaISA switch (opIdx) { case 0: //vgpr_a return 4; - case 1: //vgpr_d1 + case 1: //vgpr_d0 + return 8; + case 2: //vgpr_d1 return 8; default: fatal("op idx %i out of bounds\n", opIdx); From 022a48f9f6e97d04a0f2ed955a174a0791ca2549 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 09:11:14 -0800 Subject: [PATCH 084/492] arch-vega: Implement ds_add_u32 atomic This instruction does an atomic add of unsigned 32-bit data with a VGPR and value in LDS atomically, without return. Change-Id: I87579a94f6200a9a066f8f7390e57fb5fb6eff8e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67072 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++-- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 1f37ff14cc..afdfde3855 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -34071,6 +34071,10 @@ namespace VegaISA Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) : Inst_DS(iFmt, "ds_add_u32") { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); } // Inst_DS__DS_ADD_U32 Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() @@ -34079,14 +34083,53 @@ namespace VegaISA // --- description from .arch file --- // 32b: - // tmp = MEM[ADDR]; // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. void Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_DS__DS_ADD_U32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc // --- Inst_DS__DS_SUB_U32 class methods --- Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 1c422481de..33be33ef31 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -31211,6 +31211,8 @@ namespace VegaISA } } // getOperandSize + void initiateAcc(GPUDynInstPtr gpuDynInst) override; + void completeAcc(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr) override; }; // Inst_DS__DS_ADD_U32 From 49ac00d060861b20c6715843e90e311bb5034323 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Sat, 17 Dec 2022 09:02:50 +0000 Subject: [PATCH 085/492] stdlib: Fix errors in MESI_Three_Level_Cache_Hierarchy Change-Id: I60ae47f4336cb1b54bcca3fce3bdd13858daa92a Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66771 Reviewed-by: Matt Sinclair Reviewed-by: Jason Lowe-Power Maintainer: Matt Sinclair Tested-by: kokoro --- .../caches/mesi_three_level/dma_controller.py | 22 ++++++++++++++----- .../ruby/caches/mesi_three_level/l1_cache.py | 11 +++++----- .../ruby/caches/mesi_three_level/l2_cache.py | 9 ++++++-- .../ruby/caches/mesi_three_level/l3_cache.py | 9 ++++---- 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py index ab76d4cb5e..f731869f54 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py @@ -25,16 +25,26 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from ......utils.override import overrides -from ..abstract_dma_controller import AbstractDMAController -from m5.objects import MessageBuffer +from m5.objects import MessageBuffer, DMA_Controller -class DMAController(AbstractDMAController): - def __init__(self, network, cache_line_size): - super().__init__(network, cache_line_size) +class DMAController(DMA_Controller): + _version = 0 + + @classmethod + def _get_version(cls): + cls._version += 1 + return cls._version - 1 + + def __init__(self, dma_sequencer, ruby_system): + super().__init__( + version=self._get_version(), + dma_sequencer=dma_sequencer, + ruby_system=ruby_system, + ) + self.connectQueues(self.ruby_system.network) - @overrides(AbstractDMAController) def connectQueues(self, network): self.mandatoryQueue = MessageBuffer() self.responseFromDir = MessageBuffer(ordered=True) diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py index 2ce13d3b08..9f47e411f8 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py @@ -68,14 +68,14 @@ class L1Cache(L0Cache_Controller): self.Icache = RubyCache( size=l1i_size, assoc=l1i_assoc, - start_index_bit=self.getBlockSizeBits(), + start_index_bit=self.getBlockSizeBits(cache_line_size.value), is_icache=True, replacement_policy=LRURP(), ) self.Dcache = RubyCache( size=l1d_size, assoc=l1d_assoc, - start_index_bit=self.getBlockSizeBits(), + start_index_bit=self.getBlockSizeBits(cache_line_size.value), is_icache=False, replacement_policy=LRURP(), ) @@ -88,12 +88,11 @@ class L1Cache(L0Cache_Controller): self.response_latency = 2 self.version = self.versionCount() - self._cache_line_size = cache_line_size self.connectQueues(network) - def getBlockSizeBits(self): - bits = int(math.log(self._cache_line_size, 2)) - if 2**bits != self._cache_line_size.value: + def getBlockSizeBits(self, cache_line_size): + bits = int(math.log(cache_line_size, 2)) + if 2**bits != cache_line_size: raise Exception("Cache line size is not a power of 2!") return bits diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py index e29f566191..d8c965924e 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py @@ -67,7 +67,7 @@ class L2Cache(L1Cache_Controller): self.cache = RubyCache( size=l2_size, assoc=l2_assoc, - start_index_bit=self.getBlockSizeBits(), + start_index_bit=self.getBlockSizeBits(cache_line_size.value), is_icache=False, ) # l2_select_num_bits is ruby backend terminology. @@ -86,9 +86,14 @@ class L2Cache(L1Cache_Controller): self.to_l2_latency = 1 self.version = self.versionCount() - self._cache_line_size = cache_line_size self.connectQueues(network) + def getBlockSizeBits(self, cache_line_size): + bits = int(math.log(cache_line_size, 2)) + if 2**bits != cache_line_size: + raise Exception("Cache line size is not a power of 2!") + return bits + def connectQueues(self, network): self.mandatoryQueue = MessageBuffer() self.optionalQueue = MessageBuffer() diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py index 6d46d1fdf0..0a93d9b0c8 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py @@ -54,7 +54,7 @@ class L3Cache(L2Cache_Controller): self.L2cache = RubyCache( size=l3_size, assoc=l3_assoc, - start_index_bit=self.getIndexBit(num_l3Caches), + start_index_bit=self.getIndexBit(num_l3Caches, cache_line_size), ) self.transitions_per_cycle = 4 @@ -64,12 +64,11 @@ class L3Cache(L2Cache_Controller): self.to_l1_latency = 1 self.version = self.versionCount() - self._cache_line_size = cache_line_size self.connectQueues(network) - def getIndexBit(self, num_l3caches): - l3_bits = int(math.log(num_l3caches, 2)) - bits = int(math.log(self._cache_line_size, 2)) + l3_bits + def getIndexBit(self, num_l3Caches, cache_line_size): + l3_bits = int(math.log(num_l3Caches, 2)) + bits = int(math.log(cache_line_size, 2)) + l3_bits return bits def connectQueues(self, network): From 535727703945f44b08ab251bdc243a54c63cf51f Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Tue, 3 Jan 2023 15:33:44 +0000 Subject: [PATCH 086/492] ext-testlib: Improve error reporting when test definition fails The error reason is now reported as an element in the XML testing result summary. Change-Id: I18b84422bb9580709cf1c5f2a14a5cbb0caf1876 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66892 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- ext/testlib/result.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/ext/testlib/result.py b/ext/testlib/result.py index 5c60342c11..786febde2a 100644 --- a/ext/testlib/result.py +++ b/ext/testlib/result.py @@ -191,17 +191,23 @@ class XMLElement(object): def begin(self, file_): file_.write('<') file_.write(self.name) - for attr in self.attributes: - file_.write(' ') - attr.write(file_) + if hasattr(self, 'attributes'): + for attr in self.attributes: + file_.write(' ') + attr.write(file_) file_.write('>') self.body(file_) def body(self, file_): - for elem in self.elements: - file_.write('\n') - elem.write(file_) + if hasattr(self, 'elements'): + for elem in self.elements: + file_.write('\n') + elem.write(file_) + if hasattr(self, 'content'): + file_.write('\n') + file_.write( + xml.sax.saxutils.escape(self.content)) file_.write('\n') def end(self, file_): @@ -286,17 +292,22 @@ class JUnitTestCase(XMLElement): ] if str(test_result.result) == 'Failed': - self.elements.append(JUnitFailure('Test failed', 'ERROR')) + self.elements.append(JUnitFailure( + 'Test failed', + str(test_result.result.reason)) + ) class JUnitFailure(XMLElement): name = 'failure' - def __init__(self, message, fail_type): + def __init__(self, message, cause): self.attributes = [ XMLAttribute('message', message), - XMLAttribute('type', fail_type), ] - self.elements = [] + cause_element = XMLElement() + cause_element.name = 'cause' + cause_element.content = cause + self.elements = [cause_element] class LargeFileElement(XMLElement): From 313f557b932786a426f6f613c111005f507f1b24 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Tue, 3 Jan 2023 15:37:38 +0000 Subject: [PATCH 087/492] ext-testlib: Support str-convertible args in gem5_verify_config gem5_verify_config dit not support string-convertible args due to log_call() not trying to call str() on them. This patch maps str() on the command paramters. It is now possible to pass native integers or even string-like types like pathlib.Path as arguments without manually converting them to string. Change-Id: Ifa987f5f1a20f17c8710e1a36d99d424e4c9ce6c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66893 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- ext/testlib/helper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/testlib/helper.py b/ext/testlib/helper.py index ed6e325158..ea102f262b 100644 --- a/ext/testlib/helper.py +++ b/ext/testlib/helper.py @@ -149,7 +149,14 @@ def log_call(logger, command, time, *popenargs, **kwargs): if isinstance(command, str): cmdstr = command else: - cmdstr = ' '.join(command) + try: + command = list(map(str, command)) + cmdstr = " ".join(command) + except TypeError as e: + logger.trace( + "Argument must be an iterable of string-convertible types" + ) + raise e logger_callback = logger.trace logger.trace('Logging call to command: %s' % cmdstr) From 15cb9c7abef94d53135351422284c8651ce0133b Mon Sep 17 00:00:00 2001 From: Simon Park Date: Tue, 3 Jan 2023 01:07:03 -0800 Subject: [PATCH 088/492] base: socket: add UnixSocketAddr for representing socket paths Added UnixSocketAddr that wraps around sockaddr_un. Using this wrapper, users can create both file based sockets as well as abstract sockets. Change-Id: Ibf105b92a6a6ac7fc9136ed307f824c83e45c06c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66471 Maintainer: Gabe Black Reviewed-by: Gabe Black Tested-by: kokoro --- src/base/SConscript | 3 +- src/base/socket.cc | 66 ++++++++++++++++++++++++++++ src/base/socket.hh | 34 +++++++++++++++ src/base/socket.test.cc | 77 +++++++++++++++++++++++++++++++++ src/base/str.hh | 9 ++++ src/mem/shared_memory_server.cc | 56 +++++++++++------------- src/mem/shared_memory_server.hh | 3 +- 7 files changed, 216 insertions(+), 32 deletions(-) diff --git a/src/base/SConscript b/src/base/SConscript index e751d0b5ef..4a6b65fa72 100644 --- a/src/base/SConscript +++ b/src/base/SConscript @@ -68,7 +68,8 @@ Source('pollevent.cc') Source('random.cc') Source('remote_gdb.cc') Source('socket.cc') -GTest('socket.test', 'socket.test.cc', 'socket.cc') +SourceLib('z', tags='socket_test') +GTest('socket.test', 'socket.test.cc', 'socket.cc', 'output.cc', with_tag('socket_test')) Source('statistics.cc') Source('str.cc', add_tags=['gem5 trace', 'gem5 serialize']) GTest('str.test', 'str.test.cc', 'str.cc') diff --git a/src/base/socket.cc b/src/base/socket.cc index 5cf67fdd90..23f2b40d1a 100644 --- a/src/base/socket.cc +++ b/src/base/socket.cc @@ -35,22 +35,88 @@ #include #include #include +#include #include #include #include "base/logging.hh" +#include "base/output.hh" +#include "base/str.hh" #include "base/types.hh" #include "sim/byteswap.hh" namespace gem5 { +namespace +{ + +bool +isSocketNameAbstract(const std::string &path) +{ + if (path.empty()) { + return false; + } + // No null byte should be present in the path + return path.front() == '@'; +} + +std::string +resolve(const std::string &path) +{ + if (path.empty()) { + return path; + } + if (isSocketNameAbstract(path)) { + return '\0' + path.substr(1); + } + return simout.resolve(path); +} + +} // namespace bool ListenSocket::listeningDisabled = false; bool ListenSocket::anyListening = false; bool ListenSocket::bindToLoopback = false; +UnixSocketAddr +UnixSocketAddr::build(const std::string &path) +{ + sockaddr_un addr = {.sun_family = AF_UNIX, .sun_path = {}}; + + const bool is_abstract = isSocketNameAbstract(path); + size_t max_len = sizeof(addr.sun_path); + if (!is_abstract) { + // File based socket names need to be null terminated + max_len -= 1; + } + + std::string resolved_path = resolve(path); + std::string fmt_path = replace(resolved_path, '\0', '@'); + if (resolved_path.size() > max_len) { + resolved_path = resolved_path.substr(0, max_len); + const std::string untruncated_path = std::move(fmt_path); + fmt_path = replace(resolved_path, '\0', '@'); + warn("SocketPath: unix socket path truncated from '%s' to '%s'", + untruncated_path, fmt_path); + } + + // We can't use strncpy here, since abstract sockets start with \0 which + // will make strncpy think that the string is empty. + memcpy(addr.sun_path, resolved_path.c_str(), resolved_path.size()); + // We can't use sizeof(sockaddr_un) for abstract sockets, since all + // sizeof(sun_path) bytes are used in representing the path. + const size_t path_size = + is_abstract ? resolved_path.size() : sizeof(addr.sun_path); + const size_t addr_size = offsetof(sockaddr_un, sun_path) + path_size; + + return UnixSocketAddr{.addr = std::move(addr), + .addrSize = addr_size, + .isAbstract = is_abstract, + .formattedPath = std::move(fmt_path)}; +} + void ListenSocket::cleanup() { diff --git a/src/base/socket.hh b/src/base/socket.hh index 3375ccc80a..f3b2760518 100644 --- a/src/base/socket.hh +++ b/src/base/socket.hh @@ -31,10 +31,44 @@ #include #include +#include + +#include namespace gem5 { +/** + * @brief Wrapper around sockaddr_un, so that it can be used for both file + * based unix sockets as well as abstract unix sockets. + */ +struct UnixSocketAddr +{ + /** + * @brief Builds UnixSocketAddr from the given path. + * @pre: `path` either represents a file based unix socket, or an abstract + * unix socket. If `path` represents an abstract socket, it should + * start with the character '@', and it should not have any null + * bytes in the name. + * @param path: Pathname, where the socket should be instantiated. + * @return UnixSocketAddr + */ + static UnixSocketAddr build(const std::string &path); + + sockaddr_un addr; + // Size of `sockaddr_un addr`. This is equal to sizeof(sockaddr_un) if + // `addr` represents a normal file based unix socket. For abstract sockets + // however, the size could be different. Because all sizeof(sun_path) is + // used to represent the name of an abstract socket, addrSize for abstract + // sockets only count the number of characters actually used by sun_path, + // excluding any trailing null bytes. + size_t addrSize; + bool isAbstract; + // Formatted string for file based sockets look the same as addr.sun_path. + // For abstract sockets however, all null bytes are replaced with @ + std::string formattedPath; +}; + class ListenSocket { protected: diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc index a267f8ce43..1ab1f21070 100644 --- a/src/base/socket.test.cc +++ b/src/base/socket.test.cc @@ -28,6 +28,10 @@ #include +#include +#include +#include + #include "base/gtest/logging.hh" #include "base/socket.hh" @@ -41,6 +45,79 @@ using namespace gem5; * socket.cc have not been fully tested due to interaction with system-calls. */ +namespace { + +std::string +repeat(const std::string& str, size_t n) +{ + std::stringstream ss; + for (int i = 0; i < n; ++i) { + ss << str; + } + return ss.str(); +} + +} // namespace + +TEST(UnixSocketAddrTest, AbstractSocket) +{ + UnixSocketAddr sock_addr = UnixSocketAddr::build("@abstract"); + EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family); + // null byte will not show, so compare from the first byte + EXPECT_STREQ("abstract", sock_addr.addr.sun_path + 1); + EXPECT_TRUE(sock_addr.isAbstract); + EXPECT_STREQ("@abstract", sock_addr.formattedPath.c_str()); +} + +TEST(UnixSocketAddrTest, TruncatedAbstractSocket) +{ + // Test that address is truncated if longer than sizeof(sun_path) + constexpr size_t MaxSize = sizeof(std::declval().sun_path); + + // >sizeof(sun_path) bytes + std::string addr = "@" + repeat("123456789", 100); + ASSERT_GT(addr.size(), MaxSize); + std::string truncated_addr = addr.substr(0, MaxSize); + + UnixSocketAddr sock_addr = UnixSocketAddr::build(addr); + EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family); + // Use memcmp so that we can compare null bytes as well + std::string null_formatted = '\0' + truncated_addr.substr(1); + EXPECT_EQ(0, std::memcmp(null_formatted.c_str(), sock_addr.addr.sun_path, + MaxSize)); + EXPECT_TRUE(sock_addr.isAbstract); + EXPECT_EQ(truncated_addr, sock_addr.formattedPath); +} + +TEST(UnixSocketAddrTest, FileBasedSocket) +{ + std::string addr = "/home/parent/dir/x"; + UnixSocketAddr sock_addr = UnixSocketAddr::build(addr); + EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family); + EXPECT_STREQ(addr.c_str(), sock_addr.addr.sun_path); + EXPECT_FALSE(sock_addr.isAbstract); + EXPECT_EQ(addr, sock_addr.formattedPath); +} + +TEST(UnixSocketAddrTest, TruncatedFileBasedSocket) +{ + // sun_path should null terminate, so test that address is truncated if + // longer than sizeof(sun_path) - 1 bytes. + constexpr size_t MaxSize = + sizeof(std::declval().sun_path) - 1; + + // >sizeof(sun_path) - 1 bytes + std::string addr = "/" + repeat("123456789", 100); + ASSERT_GT(addr.size(), MaxSize); + std::string truncated_addr = addr.substr(0, MaxSize); + + UnixSocketAddr sock_addr = UnixSocketAddr::build(addr); + EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family); + EXPECT_STREQ(truncated_addr.c_str(), sock_addr.addr.sun_path); + EXPECT_FALSE(sock_addr.isAbstract); + EXPECT_EQ(truncated_addr, sock_addr.formattedPath); +} + class MockListenSocket : public ListenSocket { public: diff --git a/src/base/str.hh b/src/base/str.hh index 00409ff3d7..855fb43b28 100644 --- a/src/base/str.hh +++ b/src/base/str.hh @@ -32,6 +32,7 @@ #ifndef __BASE_STR_HH__ #define __BASE_STR_HH__ +#include #include #include #include @@ -251,6 +252,14 @@ startswith(const std::string &s, const std::string &prefix) return (s.compare(0, prefix.size(), prefix) == 0); } +inline std::string +replace(const std::string &s, char from, char to) +{ + std::string replaced = s; + std::replace(replaced.begin(), replaced.end(), from, to); + return replaced; +} + } // namespace gem5 #endif //__BASE_STR_HH__ diff --git a/src/mem/shared_memory_server.cc b/src/mem/shared_memory_server.cc index bee663bd37..6344ee0388 100644 --- a/src/mem/shared_memory_server.cc +++ b/src/mem/shared_memory_server.cc @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -44,7 +43,6 @@ #include "base/logging.hh" #include "base/output.hh" #include "base/pollevent.hh" -#include "base/socket.hh" namespace gem5 { @@ -52,51 +50,49 @@ namespace memory { SharedMemoryServer::SharedMemoryServer(const SharedMemoryServerParams& params) - : SimObject(params), unixSocketPath(simout.resolve(params.server_path)), - system(params.system), serverFd(-1) + : SimObject(params), + sockAddr(UnixSocketAddr::build(params.server_path)), + system(params.system), + serverFd(-1) { fatal_if(system == nullptr, "Requires a system to share memory from!"); // Create a new unix socket. serverFd = ListenSocket::socketCloexec(AF_UNIX, SOCK_STREAM, 0); panic_if(serverFd < 0, "%s: cannot create unix socket: %s", name(), strerror(errno)); - // Bind to the specified path. - sockaddr_un serv_addr = {}; - serv_addr.sun_family = AF_UNIX; - strncpy(serv_addr.sun_path, unixSocketPath.c_str(), - sizeof(serv_addr.sun_path) - 1); - // If the target path is truncated, warn the user that the actual path is - // different and update the target path. - if (strlen(serv_addr.sun_path) != unixSocketPath.size()) { - warn("%s: unix socket path truncated, expect '%s' but get '%s'", - name(), unixSocketPath, serv_addr.sun_path); - unixSocketPath = serv_addr.sun_path; + + const auto& [serv_addr, addr_size, is_abstract, formatted_path] = sockAddr; + + if (!is_abstract) { + // Ensure the unix socket path to use is not occupied. Also, if there's + // actually anything to be removed, warn the user something might be + // off. + bool old_sock_removed = unlink(serv_addr.sun_path) == 0; + warn_if(old_sock_removed, + "%s: server path %s was occupied and will be replaced. Please " + "make sure there is no other server using the same path.", + name(), serv_addr.sun_path); } - // Ensure the unix socket path to use is not occupied. Also, if there's - // actually anything to be removed, warn the user something might be off. - bool old_sock_removed = unlink(unixSocketPath.c_str()) == 0; - warn_if(old_sock_removed, - "%s: the server path %s was occupied and will be replaced. Please " - "make sure there is no other server using the same path.", - name(), unixSocketPath); - int bind_retv = bind(serverFd, reinterpret_cast(&serv_addr), - sizeof(serv_addr)); - fatal_if(bind_retv != 0, "%s: cannot bind unix socket: %s", name(), - strerror(errno)); + int bind_retv = bind( + serverFd, reinterpret_cast(&serv_addr), addr_size); + fatal_if(bind_retv != 0, "%s: cannot bind unix socket '%s': %s", name(), + formatted_path, strerror(errno)); // Start listening. int listen_retv = listen(serverFd, 1); fatal_if(listen_retv != 0, "%s: listen failed: %s", name(), strerror(errno)); listenSocketEvent.reset(new ListenSocketEvent(serverFd, this)); pollQueue.schedule(listenSocketEvent.get()); - inform("%s: listening at %s", name(), unixSocketPath); + inform("%s: listening at %s", name(), formatted_path); } SharedMemoryServer::~SharedMemoryServer() { - int unlink_retv = unlink(unixSocketPath.c_str()); - warn_if(unlink_retv != 0, "%s: cannot unlink unix socket: %s", name(), - strerror(errno)); + if (!sockAddr.isAbstract) { + int unlink_retv = unlink(sockAddr.addr.sun_path); + warn_if(unlink_retv != 0, "%s: cannot unlink unix socket: %s", name(), + strerror(errno)); + } int close_retv = close(serverFd); warn_if(close_retv != 0, "%s: cannot close unix socket: %s", name(), strerror(errno)); diff --git a/src/mem/shared_memory_server.hh b/src/mem/shared_memory_server.hh index 8f573fef3b..d9fbeb3f20 100644 --- a/src/mem/shared_memory_server.hh +++ b/src/mem/shared_memory_server.hh @@ -33,6 +33,7 @@ #include #include "base/pollevent.hh" +#include "base/socket.hh" #include "params/SharedMemoryServer.hh" #include "sim/sim_object.hh" #include "sim/system.hh" @@ -82,7 +83,7 @@ class SharedMemoryServer : public SimObject void process(int revent) override; }; - std::string unixSocketPath; + UnixSocketAddr sockAddr; System* system; int serverFd; From a533cb246c9e0fa373a65df3e51f9dc0f570f7ac Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 19 Dec 2022 16:08:29 +0000 Subject: [PATCH 089/492] scons: Include libraries when building gem5 as a shared object While we include shared libraries in the Executable class, we are not doing it when linking the SharedLib. This means the resulting Shared library won't have the library as a dependency (it won't appear in ldd) and the symbols will remain undefined. Any executable will fail to link with the shared library as the executable will contain undefined references. This bug was exposed when I tried to link util/tlm sources with libgem5.so. As I have libpng/libpng-dev installed in my machine, the shared library included libpng headers, but didn't link to the library as scons didn't append "-lpng" to the linking CL. Those png functions thus remained ubdefined symbols. Change-Id: Id9c4a65607a7177f71659f1ac400a67edf7080fd Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66855 Tested-by: kokoro Reviewed-by: Gabe Black Maintainer: Daniel Carvalho Reviewed-by: Daniel Carvalho Reviewed-by: Bobby Bruce --- src/SConscript | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/SConscript b/src/SConscript index 4e7139c064..51b4bd9b3b 100644 --- a/src/SConscript +++ b/src/SConscript @@ -376,6 +376,12 @@ class SharedLib(TopLevelBase): def declare(self, env): objs = self.srcs_to_objs(env, self.sources(env)) + libs = self.libs(env) + # Higher priority libraries should be earlier in the list. + libs.sort(key=lambda l: l.priority, reverse=True) + if libs: + env.Append(LIBS=list(lib.source for lib in libs)) + date_obj = env.SharedObject(date_source) env.Depends(date_obj, objs) From 7238df7859936a826159d93e36dc0bf2611ac4ef Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Tue, 20 Dec 2022 08:43:59 +0000 Subject: [PATCH 090/492] util: Update run_gem5_fs.sh script with AArch64 platform The example script is using VExpress_EMM, which is a deprecated platform and it is referring to an AArch32 kernel. With this patch we use the VExpress_GEM5_Foundation platform instead and point to a AArch64 kernel Change-Id: I961d5d5de71bc284c7492ee7b04088148909ca1b Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66856 Maintainer: Daniel Carvalho Reviewed-by: Matthias Jung Reviewed-by: Daniel Carvalho Tested-by: kokoro --- util/tlm/README | 7 +++---- util/tlm/run_gem5_fs.sh | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/util/tlm/README b/util/tlm/README index 8098afa0e5..3ae43c5909 100644 --- a/util/tlm/README +++ b/util/tlm/README @@ -145,10 +145,9 @@ C++-configured gem5 using normal gem5 > ../../build/ARM/gem5.opt ../../configs/example/fs.py \ --tlm-memory=transactor --cpu-type=TimingSimpleCPU --num-cpu=1 \ --mem-type=SimpleMemory --mem-size=512MB --mem-channels=1 --caches \ - --l2cache --machine-type=VExpress_EMM \ - --dtb-filename=vexpress.aarch32.ll_20131205.0-gem5.1cpu.dtb \ - --kernel=vmlinux.aarch32.ll_20131205.0-gem5 \ - --disk-image=linux-aarch32-ael.img + --l2cache --machine-type=VExpress_GEM5_Foundation \ + --kernel=vmlinux.arm64 \ + --disk-image=ubuntu-18.04-arm64-docker.img The message "fatal: Can't find port handler type 'tlm_slave'" is okay. The configuration will be stored in the m5out/ directory diff --git a/util/tlm/run_gem5_fs.sh b/util/tlm/run_gem5_fs.sh index 9065cbf9f5..d8ab847031 100755 --- a/util/tlm/run_gem5_fs.sh +++ b/util/tlm/run_gem5_fs.sh @@ -42,9 +42,9 @@ echo -e "\n${BGre}Create gem5 Configuration${RCol}\n" --mem-size=512MB \ --mem-channels=1 \ --caches --l2cache \ ---machine-type=VExpress_EMM \ ---dtb-filename=vexpress.aarch32.ll_20131205.0-gem5.1cpu.dtb \ ---kernel=vmlinux.aarch32.ll_20131205.0-gem5 +--machine-type=VExpress_GEM5_Foundation \ +--kernel=vmlinux.arm64 \ +--disk-image=ubuntu-18.04-arm64-docker.img echo -e "\n${BGre}Run gem5 ${RCol}\n" From 160815f482735a5834be37970f9a6e26b4755238 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 09:18:49 -0800 Subject: [PATCH 091/492] base: Specialize bitwise atomics so FP types can be used The current atomic memory operations are templated so any type can be used. However floating point types can not perform bitwise operations. The GPU model contains some instructions which do atomics on floating point types, so they need to be supported. To allow this, template specialization is added to atomic AND, OR, and XOR which does nothing if the type is floating point and operates as normal for integral types. Change-Id: I60f935756355462e99c59a9da032c5bf5afa246c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67073 Reviewed-by: Matt Sinclair Reviewed-by: Daniel Carvalho Tested-by: kokoro Maintainer: Matt Sinclair --- src/base/amo.hh | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/base/amo.hh b/src/base/amo.hh index 81bf069c50..c990d158b4 100644 --- a/src/base/amo.hh +++ b/src/base/amo.hh @@ -129,30 +129,57 @@ class AtomicGenericPair3Op : public TypedAtomicOpFunctor template class AtomicOpAnd : public TypedAtomicOpFunctor { + // Bitwise operations are only legal on integral types + template + typename std::enable_if::value, void>::type + executeImpl(B *b) { *b &= a; } + + template + typename std::enable_if::value, void>::type + executeImpl(B *b) { } + public: T a; AtomicOpAnd(T _a) : a(_a) { } - void execute(T *b) { *b &= a; } + void execute(T *b) { executeImpl(b); } AtomicOpFunctor* clone () { return new AtomicOpAnd(a); } }; template class AtomicOpOr : public TypedAtomicOpFunctor { + // Bitwise operations are only legal on integral types + template + typename std::enable_if::value, void>::type + executeImpl(B *b) { *b |= a; } + + template + typename std::enable_if::value, void>::type + executeImpl(B *b) { } + public: T a; AtomicOpOr(T _a) : a(_a) { } - void execute(T *b) { *b |= a; } + void execute(T *b) { executeImpl(b); } AtomicOpFunctor* clone () { return new AtomicOpOr(a); } }; template class AtomicOpXor : public TypedAtomicOpFunctor { + // Bitwise operations are only legal on integral types + template + typename std::enable_if::value, void>::type + executeImpl(B *b) { *b ^= a; } + + template + typename std::enable_if::value, void>::type + executeImpl(B *b) { } + public: T a; AtomicOpXor(T _a) : a(_a) {} - void execute(T *b) { *b ^= a; } + void execute(T *b) { executeImpl(b); } AtomicOpFunctor* clone () { return new AtomicOpXor(a); } }; From 892e3057f7b10f2c869bae40e908a018f781611c Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 09:13:06 -0800 Subject: [PATCH 092/492] arch-vega: Implement ds_add_f32 atomic This instruction does an atomic add of a 32-bit float with a VGPR and value in LDS atomically without return. Change-Id: Id4f23a1ab587a23edfd1d88ede1cbcc5bdedc0cb Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67074 Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++-- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index afdfde3855..3d9808ac7c 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -34755,6 +34755,10 @@ namespace VegaISA : Inst_DS(iFmt, "ds_add_f32") { setFlag(F32); + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); } // Inst_DS__DS_ADD_F32 Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() @@ -34763,15 +34767,54 @@ namespace VegaISA // --- description from .arch file --- // 32b: - // tmp = MEM[ADDR]; // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. // Floating point add that handles NaN/INF/denormal values. void Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandF32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_DS__DS_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc // --- Inst_DS__DS_WRITE_B8 class methods --- Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 33be33ef31..05a0002b25 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -31895,6 +31895,8 @@ namespace VegaISA } } // getOperandSize + void initiateAcc(GPUDynInstPtr gpuDynInst) override; + void completeAcc(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr) override; }; // Inst_DS__DS_ADD_F32 From b83457df0bee2cec66bf377d17b95c867d8ef025 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 09:54:20 -0800 Subject: [PATCH 093/492] arch-vega: Implement ds_add_u64 This instruction does an atomic add of an unsigned 64-bit data with a VGPR and value in LDS atomically without return. Change-Id: I6a7d6713b256607c4e69ddbdef5c83172493c077 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67075 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++-- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 3d9808ac7c..a54f426837 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -36088,6 +36088,10 @@ namespace VegaISA Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) : Inst_DS(iFmt, "ds_add_u64") { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); } // Inst_DS__DS_ADD_U64 Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() @@ -36096,14 +36100,53 @@ namespace VegaISA // --- description from .arch file --- // 64b: - // tmp = MEM[ADDR]; // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. void Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc // --- Inst_DS__DS_SUB_U64 class methods --- Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 05a0002b25..f8fc98b647 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -33079,6 +33079,8 @@ namespace VegaISA } } // getOperandSize + void initiateAcc(GPUDynInstPtr gpuDynInst) override; + void completeAcc(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr) override; }; // Inst_DS__DS_ADD_U64 From 3bfa220e4ecd098de36d81a171593b14d7551583 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 13:27:06 -0800 Subject: [PATCH 094/492] arch-vega: Implement ds_read_i8 Read one byte with sign extended from LDS. Change-Id: I9cb9b4033c6f834241cba944bc7e6a7ebc5401be Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67076 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 44 +++++++++++++++++++++- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index a54f426837..c803656996 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -35636,8 +35636,50 @@ namespace VegaISA void Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_DS__DS_READ_I8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_I8::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + + vdst.write(); + } // completeAcc // --- Inst_DS__DS_READ_U8 class methods --- Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index f8fc98b647..b2cf2b9705 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -32848,6 +32848,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_DS__DS_READ_I8 class Inst_DS__DS_READ_U8 : public Inst_DS From 450bc254bd31260f24e2c5068e2c6295d603b87a Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 15:08:23 -0800 Subject: [PATCH 095/492] arch-vega: Read one dword for SGPR base global insts Global instructions in Vega can either use a VGPR base address plus instruction offset or SGPR base address plus VGPR offset plus instruction offset. Currently the VGPR address/offset is always read as two dwords. This causes problems if the VGPR number is the last VGPR allocated to a wavefront since the second dword would be beyond the allocation and trip an assert. This changeset sets the operand size of the VGPR operand to one dword when SGPR base is used and two dwords otherwise so initDynOperandInfo does not assert. It also moves the read of the VGPR into the calcAddr method so that the correct ConstVecOperandU## is used to prevent another assertion failure when reading from the register file. These two changes are made to all flat instructions, as global instructions are a subsegement of flat instructions. Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67077 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 80 +++++----------------- src/arch/amdgpu/vega/insts/instructions.hh | 80 +++++++++++----------- src/arch/amdgpu/vega/insts/op_encodings.hh | 20 ++++-- 3 files changed, 73 insertions(+), 107 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index c803656996..4b27afa65d 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -43831,11 +43831,7 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -43919,11 +43915,7 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44008,11 +44000,7 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44067,11 +44055,7 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44126,11 +44110,7 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44194,11 +44174,7 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44266,13 +44242,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU8 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44325,13 +44299,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU16 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44384,13 +44356,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44444,13 +44414,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU64 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44504,17 +44472,15 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data0(gpuDynInst, extData.DATA); ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - addr.read(); data0.read(); data1.read(); data2.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44572,19 +44538,17 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data0(gpuDynInst, extData.DATA); ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); - addr.read(); data0.read(); data1.read(); data2.read(); data3.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44650,13 +44614,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44732,15 +44694,13 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); - addr.read(); data.read(); cmp.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44814,13 +44774,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -45204,15 +45162,13 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU64 data(gpuDynInst, extData.DATA); ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); - addr.read(); data.read(); cmp.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -45287,13 +45243,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU64 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index b2cf2b9705..9f017f9b90 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -41892,7 +41892,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 1 : 8; case 2: //vgpr_dst @@ -41929,7 +41929,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 1 : 8; case 2: //vgpr_dst @@ -41966,7 +41966,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 2 : 8; case 2: //vgpr_dst @@ -42003,7 +42003,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 2 : 8; case 2: //vgpr_dst @@ -42040,7 +42040,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 4 : 8; case 2: //vgpr_dst @@ -42077,7 +42077,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 8 : 8; case 2: //vgpr_dst @@ -42114,7 +42114,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 12 : 8; case 2: //vgpr_dst @@ -42151,7 +42151,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_dst or saddr return isFlat() ? 16 : 8; case 2: //vgpr_dst @@ -42188,7 +42188,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 1; case 2: //saddr @@ -42225,7 +42225,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 2; case 2: //saddr @@ -42262,7 +42262,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //saddr @@ -42299,7 +42299,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //saddr @@ -42336,7 +42336,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 12; case 2: //saddr @@ -42373,7 +42373,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 16; case 2: //saddr @@ -42410,7 +42410,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42449,7 +42449,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -42488,7 +42488,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42527,7 +42527,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42564,7 +42564,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42601,7 +42601,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42638,7 +42638,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42675,7 +42675,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42712,7 +42712,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42749,7 +42749,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42786,7 +42786,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42823,7 +42823,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42860,7 +42860,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 4; case 2: //vgpr_dst or saddr @@ -42897,7 +42897,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -42934,7 +42934,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 16; case 2: //vgpr_dst or saddr @@ -42973,7 +42973,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43012,7 +43012,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43049,7 +43049,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43086,7 +43086,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43123,7 +43123,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43160,7 +43160,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43197,7 +43197,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43234,7 +43234,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43271,7 +43271,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43308,7 +43308,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr @@ -43345,7 +43345,7 @@ namespace VegaISA { switch (opIdx) { case 0: //vgpr_addr - return 8; + return vgprIsOffset() ? 4 : 8; case 1: //vgpr_src return 8; case 2: //vgpr_dst or saddr diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 508d706cd3..34f6040495 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -925,7 +925,7 @@ namespace VegaISA } void - calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, + calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset) { // Offset is a 13-bit field w/the following meanings: @@ -940,14 +940,20 @@ namespace VegaISA // be a 64-bit address. Otherwise, saddr is the reg index for a // scalar reg used as the base address for a 32-bit address. if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) { - calcAddrVgpr(gpuDynInst, vaddr, offset); + ConstVecOperandU64 vbase(gpuDynInst, vaddr); + vbase.read(); + + calcAddrVgpr(gpuDynInst, vbase, offset); } else { // Assume we are operating in 64-bit mode and read a pair of // SGPRs for the address base. ConstScalarOperandU64 sbase(gpuDynInst, saddr); sbase.read(); - calcAddrSgpr(gpuDynInst, vaddr, sbase, offset); + ConstVecOperandU32 voffset(gpuDynInst, vaddr); + voffset.read(); + + calcAddrSgpr(gpuDynInst, voffset, sbase, offset); } if (isFlat()) { @@ -974,6 +980,12 @@ namespace VegaISA } } + bool + vgprIsOffset() + { + return (extData.SADDR != 0x7f); + } + // first instruction DWORD InFmt_FLAT instData; // second instruction DWORD @@ -987,7 +999,7 @@ namespace VegaISA void generateGlobalDisassembly(); void - calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, + calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset) { // Use SGPR pair as a base address and add VGPR-offset and From 03083ba5e3bf8a9ce416003e9f8809c54599d831 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 16:46:40 -0800 Subject: [PATCH 096/492] arch-vega: Implement ds_write2st64_b64 Write two qwords at offsets multiplied by 8 * 64 bytes. Change-Id: I0d0e05f3e848c2fd02d32095e32b7f023bd8803b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67078 Reviewed-by: Matt Sinclair Tested-by: kokoro Maintainer: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 46 +++++++++++++++++++++- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 4b27afa65d..6cf01fb8f9 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -36595,8 +36595,52 @@ namespace VegaISA void Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_DS__DS_WRITE2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8 * 64; + Addr offset1 = instData.OFFSET1 * 8 * 64; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // --- Inst_DS__DS_CMPST_B64 class methods --- Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 9f017f9b90..289673232b 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -33572,6 +33572,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_DS__DS_WRITE2ST64_B64 class Inst_DS__DS_CMPST_B64 : public Inst_DS From ddf43726ef95fb9b64f89109413a62aa070fada8 Mon Sep 17 00:00:00 2001 From: Vishnu Ramadas Date: Wed, 4 Jan 2023 21:07:54 -0600 Subject: [PATCH 097/492] gpu-compute, mem-ruby: Update GPU cache bypassing to use TBE An earlier commit added support for GLC and SLC AMDGPU instruction modifiers. These modifiers enable cache bypassing when set. The GLC/SLC flag information was being threaded through all the way to memory and back so that appropriate actions could be taken upon receiving a request and corresponding response. This commit removes the threading and adds the bypass flag information to TBE. Requests populate this entry and responses access it to determine the correct set of actions to execute. Change-Id: I20ffa6682d109270adb921de078cfd47fb4e137c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67191 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair Reviewed-by: Jason Lowe-Power --- src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 59 +++++++++------------ src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm | 30 ----------- src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm | 2 - 3 files changed, 25 insertions(+), 66 deletions(-) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index ae142471fa..ca4c543722 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -283,7 +283,13 @@ machine(MachineType:TCC, "TCC Cache") peek(responseFromNB_in, ResponseMsg, block_on="addr") { TBE tbe := TBEs.lookup(in_msg.addr); Entry cache_entry := getCacheEntry(in_msg.addr); - if (in_msg.isSLCSet) { + bool is_slc_set := false; + + if (!is_invalid(tbe)) { + is_slc_set := tbe.isSLCSet; + } + + if (is_slc_set) { // If the SLC bit is set, the response needs to bypass the cache // and should not be allocated an entry. trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); @@ -343,6 +349,10 @@ machine(MachineType:TCC, "TCC Cache") trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); } } else if (in_msg.Type == CoherenceRequestType:Atomic) { + // Currently the Atomic requests do not have GLC/SLC bit handing + // support. The assert ensures that the requests do not have + // these set, and therefore do not expect to bypass the cache + assert(!in_msg.isSLCSet); trigger(Event:Atomic, in_msg.addr, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:RdBlk) { if (in_msg.isSLCSet) { @@ -399,8 +409,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.State := CoherenceState:Shared; DPRINTF(RubySlicc, "%s\n", out_msg); peek(responseFromNB_in, ResponseMsg) { - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; } } enqueue(unblockToNB_out, UnblockMsg, 1) { @@ -408,8 +418,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); out_msg.MessageSize := MessageSizeType:Unblock_Control; peek(responseFromNB_in, ResponseMsg) { - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; } DPRINTF(RubySlicc, "%s\n", out_msg); } @@ -426,8 +436,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.Dirty := false; out_msg.State := CoherenceState:Shared; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } enqueue(unblockToNB_out, UnblockMsg, 1) { @@ -449,8 +459,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); out_msg.Shared := false; // unneeded for this request out_msg.MessageSize := in_msg.MessageSize; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -467,9 +477,6 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.instSeqNum := in_msg.instSeqNum; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; - } } } @@ -484,9 +491,6 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.instSeqNum := in_msg.instSeqNum; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; - } } } @@ -500,9 +504,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Sender := machineID; out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk := in_msg.DataBlk; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; - + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; } } } @@ -535,9 +538,9 @@ machine(MachineType:TCC, "TCC Cache") peek(coreRequestNetwork_in, CPURequestMsg) { if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){ tbe.Destination.add(in_msg.Requestor); - tbe.isGLCSet := in_msg.isGLCSet; - tbe.isSLCSet := in_msg.isSLCSet; } + tbe.isGLCSet := in_msg.isGLCSet; + tbe.isSLCSet := in_msg.isSLCSet; } } } @@ -576,8 +579,6 @@ machine(MachineType:TCC, "TCC Cache") out_msg.DataBlk := in_msg.DataBlk; out_msg.writeMask.orMask(in_msg.writeMask); out_msg.instSeqNum := in_msg.instSeqNum; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -593,10 +594,6 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Dirty := true; out_msg.DataBlk := cache_entry.DataBlk; out_msg.writeMask.orMask(cache_entry.writeMask); - peek(coreRequestNetwork_in, CPURequestMsg) { - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; - } } } @@ -611,8 +608,6 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Type := CoherenceRequestType:Atomic; out_msg.Dirty := true; out_msg.writeMask.orMask(in_msg.writeMask); - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -628,10 +623,6 @@ machine(MachineType:TCC, "TCC Cache") out_msg.Ntsl := true; out_msg.State := CoherenceState:NA; out_msg.MessageSize := MessageSizeType:Response_Control; - peek(probeNetwork_in, NBProbeRequestMsg) { - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; - } } } action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") { @@ -676,8 +667,8 @@ machine(MachineType:TCC, "TCC Cache") out_msg.addr := address; out_msg.Type := TriggerType:AtomicDone; peek(responseFromNB_in, ResponseMsg) { - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; + out_msg.isGLCSet := tbe.isGLCSet; + out_msg.isSLCSet := tbe.isSLCSet; } } } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 57edef8f2b..3b38e3b1ff 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -161,8 +161,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") uint64_t probe_id, desc="probe id for lifetime profiling"; WriteMask writeMask, desc="outstanding write through mask"; int Len, desc="Length of memory request for DMA"; - bool isGLCSet, desc="Bypass L1 Cache"; - bool isSLCSet, desc="Bypass L1 and L2 Cache"; } structure(TBETable, external="yes") { @@ -485,8 +483,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; out_msg.OriginalResponder := tbe.LastSender; out_msg.L3Hit := tbe.L3Hit; - out_msg.isGLCSet := tbe.isGLCSet; - out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -516,8 +512,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ForwardRequestTime := tbe.ForwardRequestTime; out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; out_msg.OriginalResponder := tbe.LastSender; - out_msg.isGLCSet := tbe.isGLCSet; - out_msg.isSLCSet := tbe.isSLCSet; if(tbe.atomicData){ out_msg.WTRequestor := tbe.WTRequestor; } @@ -546,8 +540,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.InitialRequestTime := tbe.InitialRequestTime; out_msg.ForwardRequestTime := curCycle(); out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; - out_msg.isGLCSet := tbe.isGLCSet; - out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); } } @@ -565,8 +557,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ForwardRequestTime := curCycle(); out_msg.ProbeRequestStartTime := curCycle(); out_msg.instSeqNum := in_msg.instSeqNum; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -579,8 +569,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := in_msg.DataBlk; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -636,8 +624,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Type := MemoryRequestType:MEMORY_READ; out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Request_Control; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } } } @@ -753,8 +739,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination := probe_dests; tbe.NumPendingAcks := out_msg.Destination.count(); - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; DPRINTF(RubySlicc, "%s\n", out_msg); APPEND_TRANSITION_COMMENT(" dc: Acks remaining: "); APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); @@ -858,8 +842,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination := probe_dests; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; tbe.NumPendingAcks := out_msg.Destination.count(); DPRINTF(RubySlicc, "%s\n", (out_msg)); APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); @@ -915,8 +897,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.ReturnData := false; out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination := probe_dests; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; tbe.NumPendingAcks := out_msg.Destination.count(); APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); @@ -943,8 +923,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := in_msg.DataBlk; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } if (tbe.Dirty == false) { // have to update the TBE, too, because of how this @@ -1007,8 +985,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.NumPendingAcks := 0; tbe.Cached := in_msg.ForceShared; tbe.InitialRequestTime := in_msg.InitialRequestTime; - tbe.isGLCSet := in_msg.isGLCSet; - tbe.isSLCSet := in_msg.isSLCSet; } } @@ -1028,8 +1004,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := tbe.DataBlk; - out_msg.isGLCSet := tbe.isGLCSet; - out_msg.isSLCSet := tbe.isSLCSet; DPRINTF(ProtocolTrace, "%s\n", out_msg); } } @@ -1130,8 +1104,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := victim_entry.DataBlk; - out_msg.isGLCSet := in_msg.isGLCSet; - out_msg.isSLCSet := in_msg.isSLCSet; } L3CacheMemory.deallocate(victim); } @@ -1164,8 +1136,6 @@ machine(MachineType:Directory, "AMD Baseline protocol") out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Data; out_msg.DataBlk := victim_entry.DataBlk; - out_msg.isGLCSet := tbe.isGLCSet; - out_msg.isSLCSet := tbe.isSLCSet; } L3CacheMemory.deallocate(victim); } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm index 6ff19e953b..bb3a013325 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm @@ -168,8 +168,6 @@ structure(NBProbeRequestMsg, desc="...", interface="Message") { MachineID Requestor, desc="Requestor id for 3-hop requests"; bool NoAckNeeded, default="false", desc="For short circuting acks"; int ProgramCounter, desc="PC that accesses to this block"; - bool isGLCSet, desc="Bypass L1 Cache"; - bool isSLCSet, desc="Bypass L1 and L2 Caches"; bool functionalRead(Packet *pkt) { return false; From c23d7bb3eed3a0f7146b4b9d462ea16cc1e8a036 Mon Sep 17 00:00:00 2001 From: Vishnu Ramadas Date: Wed, 4 Jan 2023 21:34:17 -0600 Subject: [PATCH 098/492] gpu-compute, mem-ruby: Add p_popRequestQueue to some transitions Two W->WI transitions, on events RdBlk and Atomic in the GPU L2 cache coherence protocol do not clear the request from the request queue upon completing the transition. This action is not performed in the respone path. This update adds the p_popRequestQueue action to each of these transitions to remove the stale request from the queue. Change-Id: Ia2679fe3dd702f4df2bc114f4607ba40c18d6ff1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67192 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index ca4c543722..0f93339827 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -721,6 +721,7 @@ machine(MachineType:TCC, "TCC Cache") p_profileHit; t_allocateTBE; wb_writeBack; + p_popRequestQueue; } transition(I, RdBlk, IV) {TagArrayRead} { @@ -815,6 +816,7 @@ transition(I, Atomic, A) {TagArrayRead} { p_profileHit; t_allocateTBE; wb_writeBack; + p_popRequestQueue; } transition(I, WrVicBlk) {TagArrayRead} { From ac54c7ffad6d19820b2b875b58b0e3510d60f7e7 Mon Sep 17 00:00:00 2001 From: Hanhwi Jang Date: Thu, 5 Jan 2023 14:52:11 +0900 Subject: [PATCH 099/492] cpu-o3: Resolve the skid buffer overflow issue at decode stage When decode width is larger than fetch width, the skid buffer overflow happens at decode stage. The decode stage assumes that fetch stage sends instructions as many as the fetch width, but it sends them at decode width rate. This patch makes the decode stage set its skid buffer size according to the decode width. Change-Id: I90ee43d16c59a4c9305c77bbfad7e4cdb2b9cffa Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67231 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Reviewed-by: Hanhwi Jang Reviewed-by: Tom Rollet Tested-by: kokoro --- src/cpu/o3/decode.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index 9555e32c23..ac728a2f2c 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -77,7 +77,7 @@ Decode::Decode(CPU *_cpu, const BaseO3CPUParams ¶ms) decodeWidth, static_cast(MaxWidth)); // @todo: Make into a parameter - skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth; + skidBufferMax = (fetchToDecodeDelay + 1) * params.decodeWidth; for (int tid = 0; tid < MaxThreads; tid++) { stalls[tid] = {false}; decodeStatus[tid] = Idle; From 68cf65e9b581b49edaf4744520ca1586dfd6682f Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Fri, 6 Jan 2023 13:53:55 +0000 Subject: [PATCH 100/492] scons: Clone env before modifying it in SharedLib Without this, modifications to env propagate to unexpected places. This mirrors behaviour in Executable (where the code was copied from). Change-Id: I35bbf2f3cc2786eb50ff751c813853971ab284fe Signed-off-by: Nicolas Boichat Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67233 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Reviewed-by: Gabe Black Tested-by: kokoro --- src/SConscript | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/SConscript b/src/SConscript index 51b4bd9b3b..3179849601 100644 --- a/src/SConscript +++ b/src/SConscript @@ -376,6 +376,8 @@ class SharedLib(TopLevelBase): def declare(self, env): objs = self.srcs_to_objs(env, self.sources(env)) + env = env.Clone() + libs = self.libs(env) # Higher priority libraries should be earlier in the list. libs.sort(key=lambda l: l.priority, reverse=True) From 8aa9f52953dfe5bd6bf53e6d509d06cc343534d5 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 5 Jan 2023 03:24:29 +0000 Subject: [PATCH 101/492] systemc: Add facilities to add extra SystemC message handlers Some clients (e.g. fastmodel integration) would like to catch specific warning messages from SystemC. Adding facilities to chain extra report handler (instead of just replacing the default one), that are run after the default/set handler. Change-Id: I8ef140fc897ae5eee0fc78c70caf081f625efbfd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67234 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/systemc/utils/report.cc | 25 +++++++++++++++++++++++++ src/systemc/utils/report.hh | 8 ++++++++ src/systemc/utils/sc_report_handler.cc | 4 ++++ 3 files changed, 37 insertions(+) diff --git a/src/systemc/utils/report.cc b/src/systemc/utils/report.cc index 2b15fced8c..5f3425f398 100644 --- a/src/systemc/utils/report.cc +++ b/src/systemc/utils/report.cc @@ -68,6 +68,31 @@ sc_core::sc_actions reportCatchActions = sc_core::SC_DISPLAY; sc_core::sc_report_handler_proc reportHandlerProc = &sc_core::sc_report_handler::default_handler; +namespace +{ + +std::list extraReportHandlerProcs; + +} // anonymous namespace + +const std::list & +getExtraSystemCReportHandlers() +{ + return extraReportHandlerProcs; +} + +void +addExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc) +{ + extraReportHandlerProcs.push_back(proc); +} + +void +removeExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc) +{ + extraReportHandlerProcs.remove(proc); +} + std::unique_ptr globalReportCache; bool reportWarningsAsErrors = false; diff --git a/src/systemc/utils/report.hh b/src/systemc/utils/report.hh index 1f12eef6a8..d7ea3401e3 100644 --- a/src/systemc/utils/report.hh +++ b/src/systemc/utils/report.hh @@ -29,6 +29,7 @@ #define __SYSTEMC_UTILS_REPORT_HH__ #include +#include #include #include #include @@ -103,6 +104,13 @@ extern sc_core::sc_actions reportCatchActions; extern sc_core::sc_report_handler_proc reportHandlerProc; +// gem5-specific support for extra SystemC report handlers. Called _after_ +// the default/set handler. +const std::list + &getExtraSystemCReportHandlers(); +void addExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc); +void removeExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc); + extern std::unique_ptr globalReportCache; extern bool reportWarningsAsErrors; diff --git a/src/systemc/utils/sc_report_handler.cc b/src/systemc/utils/sc_report_handler.cc index b893b1dff3..3421ab912d 100644 --- a/src/systemc/utils/sc_report_handler.cc +++ b/src/systemc/utils/sc_report_handler.cc @@ -103,6 +103,10 @@ sc_report_handler::report(sc_severity severity, const char *msg_type, } sc_gem5::reportHandlerProc(report, actions); + + for (auto& handler : sc_gem5::getExtraSystemCReportHandlers()) { + handler(report, actions); + } } void From f89973c9e1408bdbba23a3737c4929cb088834d1 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 5 Jan 2023 03:32:24 +0000 Subject: [PATCH 102/492] fastmodel: Add handler to catch DMI warnings Catch DMI warnings from fastmodel, and abort the simulation when they happen (instead of slowing down simulation). This is controlled by an exit_on_dmi_warning flag passed to fm.setup_simulation, defaulting to false. Change-Id: I07fbc9b2579989d40d601ff0b6af9bfe719309a1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67235 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/arch/arm/fastmodel/arm_fast_model.py | 6 +++++- src/arch/arm/fastmodel/fastmodel.cc | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py index 11004177c6..d2d911f5b4 100644 --- a/src/arch/arm/fastmodel/arm_fast_model.py +++ b/src/arch/arm/fastmodel/arm_fast_model.py @@ -141,7 +141,11 @@ def scx_get_min_sync_latency(arg=None): # This should be called once per simulation -def setup_simulation(sim_name, min_sync_latency=100.0 / 100000000): +def setup_simulation( + sim_name, min_sync_latency=100.0 / 100000000, exit_on_dmi_warning=False +): set_armlmd_license_file() scx_initialize(sim_name) scx_set_min_sync_latency(min_sync_latency) + if exit_on_dmi_warning: + _m5.arm_fast_model.gem5.enable_exit_on_dmi_warning_handler() diff --git a/src/arch/arm/fastmodel/fastmodel.cc b/src/arch/arm/fastmodel/fastmodel.cc index 33a0c43f87..2edf1fa0c3 100644 --- a/src/arch/arm/fastmodel/fastmodel.cc +++ b/src/arch/arm/fastmodel/fastmodel.cc @@ -37,9 +37,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "base/logging.hh" #include "python/pybind11/pybind.hh" #include "scx/scx.h" #include "sim/init.hh" +#include "systemc/utils/report.hh" namespace gem5 { @@ -47,6 +49,21 @@ namespace gem5 namespace { +void +fastmodel_sc_report_handler( + const sc_core::sc_report &report, const sc_core::sc_actions &actions) +{ + const char *msg = report.get_msg(); + if (!msg) + return; + + panic_if( + strstr(msg, "Simulation code-translation cache failed to gain DMI") || + strstr(msg, "I-side given unusable DMI"), + "DMI warning from fastmodel, " + "aborting simulation instead of running slowly."); +} + void arm_fast_model_pybind(pybind11::module_ &m_internal) { @@ -118,6 +135,12 @@ arm_fast_model_pybind(pybind11::module_ &m_internal) static_cast *)>( &scx::scx_get_min_sync_latency)) ; + + // submodule for gem5-specific functions + auto gem5 = arm_fast_model.def_submodule("gem5"); + gem5.def("enable_exit_on_dmi_warning_handler", []() { + sc_gem5::addExtraSystemCReportHandler(fastmodel_sc_report_handler); + }); } EmbeddedPyBind embed_("arm_fast_model", &arm_fast_model_pybind); From 24e2ef0b7808f971f3ea651d9946770b3a2a9055 Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Sat, 7 Jan 2023 16:01:57 -0600 Subject: [PATCH 103/492] mem-ruby, gpu-compute: fix TCP GLC cache bypassing 66d4a158 added support for AMD's GPU cache bypassing flags (GLC for bypassing L1 caches, SLC for bypassing all caches). However, for applications that use the GLC flag but intermix GLC- and non-GLC accesses to the same address, this previous commit has a bug. This bug manifests when the address is currently valid in the L1 (TCP). In this case, the previous commit chose to evict the line before letting the bypassing access to proceed. However, to do this the previous commit was using the inv_invDone action as part of the process of evicting it. This action is only intended to be called when load acquires are being performed (i.e., when the entire L1 cache is being flash invalidated). Thus, calling inv_invDone for a GLC (or SLC) bypassing request caused an assert failure since the bypassing request was not performing a load acquire. This commit resolves this by changing the support in this case to simply invalidate the entry in the cache. Change-Id: Ibaa4976f8714ac93650020af1c0ce2b6732c95a2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67199 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 3be1397d49..14bdcecbc2 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -614,7 +614,6 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") // request to L2. transition(V, LoadBypassEvict, I) {TagArrayRead, TagArrayWrite} { uu_profileDataMiss; - inv_invDone; ic_invCache; n_issueRdBlk; p_popMandatoryQueue; From 1d467bed7f6f2b5a362fa5e2bf739ca3cc239d82 Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Sat, 7 Jan 2023 16:43:20 -0600 Subject: [PATCH 104/492] mem-ruby: fix TCP spacing/spelling Change-Id: I3fd9009592c8716a3da19dcdccf68f16af6522ef Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67200 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 14bdcecbc2..6a977c4e9b 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -261,7 +261,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") // If L1 is disabled or requests have GLC or SLC flag set, // then, the requests should not cache in the L1. The response // from L2/global memory should bypass the cache - trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); + trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); } else { if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) { trigger(Event:TCC_Ack, in_msg.addr, cache_entry, tbe); @@ -288,7 +288,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") DPRINTF(RubySlicc, "%s\n", in_msg); if (in_msg.Type == RubyRequestType:LD) { if ((in_msg.isGLCSet || in_msg.isSLCSet) && is_valid(cache_entry)) { - // Read rquests with GLC or SLC bit set should not cache in the L1. + // Read requests with GLC or SLC bit set should not cache in the L1. // They need to bypass the L1 and go to the L2. If an entry exists // in the L1, it needs to be evicted trigger(Event:LoadBypassEvict, in_msg.LineAddress, cache_entry, tbe); @@ -609,15 +609,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") p_popMandatoryQueue; } -// Transition to be called when a load request with GLC or SLC flag set arrives -// at L1. This transition invalidates any existing entry and forwards the -// request to L2. + // Transition to be called when a load request with GLC or SLC flag set arrives + // at L1. This transition invalidates any existing entry and forwards the + // request to L2. transition(V, LoadBypassEvict, I) {TagArrayRead, TagArrayWrite} { uu_profileDataMiss; ic_invCache; n_issueRdBlk; p_popMandatoryQueue; -} + } transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} { t_allocateTBE; From 4e61a9833693283265b338da1696bfea93762f50 Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Sat, 7 Jan 2023 16:44:25 -0600 Subject: [PATCH 105/492] mem-ruby: add GPU cache bypass I->I transition 66d4a158 added support for AMD's GPU cache bypassing flags (GLC for bypassing L1 caches, SLC for bypassing all caches). However, it did not add a transition for the situation where the cache line is currently I (Invalid). This commit adds this support, which resolves an assert failure in Pannotia workloads when this situation arises. Change-Id: I59a62ce70c01dd8b73aacb733fb3d1d0dab2624b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67201 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 6a977c4e9b..7e0ad4ed96 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -619,6 +619,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") p_popMandatoryQueue; } + // Transition to be called when a load request with GLC or SLC flag set arrives + // at L1. Since the entry is invalid, there isn't anything to forward to L2, + // so just issue read. + transition(I, LoadBypassEvict) {TagArrayRead, TagArrayWrite} { + uu_profileDataMiss; + n_issueRdBlk; + p_popMandatoryQueue; + } + transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} { t_allocateTBE; mru_updateMRU; From d6bbccb60a656cc63a8ccb9800672aab411d723b Mon Sep 17 00:00:00 2001 From: Vishnu Ramadas Date: Mon, 9 Jan 2023 12:48:42 -0600 Subject: [PATCH 106/492] gpu-compute : Fix incorrect TLB stats when FunctionalTLB is used When FunctionalTLB is used in SE mode, the stats tlbLatency and tlbCycles report negative values. This patch fixes it by disabling the updates that result in negative values when FunctionalTLB is set to true Change-Id: I6962785fc1730b166b6d5b879e9c7618a8d6d4b3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67202 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Maintainer: Matthew Poremba Reviewed-by: Matthew Poremba Tested-by: kokoro --- src/gpu-compute/compute_unit.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 62cfbf94cf..06fe28f5b8 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -1078,7 +1078,9 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt) fatal("pkt is not a read nor a write\n"); } - stats.tlbCycles -= curTick(); + if (!functionalTLB) { + stats.tlbCycles -= curTick(); + } ++stats.tlbRequests; PortID tlbPort_index = perLaneTLB ? index : 0; From 3f2c55cb63adfe702c8f6b30f879ae3c926d0a9a Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Mon, 9 Jan 2023 14:04:06 +0800 Subject: [PATCH 107/492] arch-riscv: Check RISCV process run in matched CPU 1. Remove set RV32 flag in RiscvProcess32 2. Check if binary run appropriate CPU Change-Id: I00b0725f3eb4f29e45b8ec719317af79355dc728 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67251 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/process.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/arch/riscv/process.cc b/src/arch/riscv/process.cc index dc7abae790..cd00f5d63a 100644 --- a/src/arch/riscv/process.cc +++ b/src/arch/riscv/process.cc @@ -101,8 +101,12 @@ RiscvProcess64::initState() Process::initState(); argsInit(PageBytes); - for (ContextID ctx: contextIds) - system->threads[ctx]->setMiscRegNoEffect(MISCREG_PRV, PRV_U); + for (ContextID ctx: contextIds) { + auto *tc = system->threads[ctx]; + tc->setMiscRegNoEffect(MISCREG_PRV, PRV_U); + auto *isa = dynamic_cast(tc->getIsaPtr()); + fatal_if(isa->rvType() != RV64, "RISC V CPU should run in 64 bits mode"); + } } void @@ -114,9 +118,8 @@ RiscvProcess32::initState() for (ContextID ctx: contextIds) { auto *tc = system->threads[ctx]; tc->setMiscRegNoEffect(MISCREG_PRV, PRV_U); - PCState pc = tc->pcState().as(); - pc.rvType(RV32); - tc->pcState(pc); + auto *isa = dynamic_cast(tc->getIsaPtr()); + fatal_if(isa->rvType() != RV32, "RISC V CPU should run in 32 bits mode"); } } From 7c670c16675cd0fa155d04c8966b9b02ca53593d Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 3 Jan 2023 14:00:32 +0800 Subject: [PATCH 108/492] arch-riscv: Correct interrupt order In Section 3.1.14 of Volume II Riscv Spec., the interrupt order should be MEI, MSI, MTI, SEI, SSI, STI and so on. issues: https://gem5.atlassian.net/browse/GEM5-889 Change-Id: I357c86eecd74e9e65bbfd3d4d31e68bc276f8760 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67211 Maintainer: Jason Lowe-Power Reviewed-by: Yu-hsin Wang Tested-by: kokoro Reviewed-by: Jui-min Lee Reviewed-by: Jason Lowe-Power --- src/arch/riscv/interrupts.hh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/riscv/interrupts.hh b/src/arch/riscv/interrupts.hh index f10c5f386a..a1ee396cd4 100644 --- a/src/arch/riscv/interrupts.hh +++ b/src/arch/riscv/interrupts.hh @@ -125,9 +125,9 @@ class Interrupts : public BaseInterrupts return std::make_shared(); std::bitset mask = globalMask(); const std::vector interrupt_order { - INT_EXT_MACHINE, INT_TIMER_MACHINE, INT_SOFTWARE_MACHINE, - INT_EXT_SUPER, INT_TIMER_SUPER, INT_SOFTWARE_SUPER, - INT_EXT_USER, INT_TIMER_USER, INT_SOFTWARE_USER + INT_EXT_MACHINE, INT_SOFTWARE_MACHINE, INT_TIMER_MACHINE, + INT_EXT_SUPER, INT_SOFTWARE_SUPER, INT_TIMER_SUPER, + INT_EXT_USER, INT_SOFTWARE_USER, INT_TIMER_USER }; for (const int &id : interrupt_order) if (checkInterrupt(id) && mask[id]) From 626e445563a80b67150a5e0bc5d55f1b393762e7 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 10 Jan 2023 05:02:05 -0800 Subject: [PATCH 109/492] dev: Add a "resetter" callback to the typed register class. When using the typed register template, most functionality of the class can be controlled using callbacks. For instance, callbacks can be installed to handle reads or writes to a register without having to subclass the template and override those methods using inheritance. The recently added reset() method did not follow this pattern though, which has two problems. First, it's inconsistent with how the class is normally used. Second, once you've defined a subclass, the reader, writer, etc, callbacks still expect the type of the original class. That means these have to either awkwardly use a type different from the actual real type of the register, or use awkward, inefficient, and/or dangerous casting to get back to the true type. To address these problems, this change adds a resetter(...) method which works like the reader(...) or writer(...) methods to optionally install a callback to implement any special reset behavior. Change-Id: Ia74b36616fd459c1dbed9304568903a76a4b55de Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67203 Tested-by: kokoro Reviewed-by: Yu-hsin Wang Maintainer: Gabe Black --- src/dev/reg_bank.hh | 40 +++++++++++++++++++++++++++++++- src/dev/reg_bank.test.cc | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh index 32d9058a15..3d8dc576cb 100644 --- a/src/dev/reg_bank.hh +++ b/src/dev/reg_bank.hh @@ -270,6 +270,12 @@ * is an alternative form of update which also takes a custom bitmask, if you * need to update bits other than the normally writeable ones. * + * Similarly, you can set a "resetter" handler which is responsible for + * resetting the register. It takes a reference to the current Register, and + * no other parameters. The "initialValue" accessor can retrieve the value the + * register was constructed with. The register is simply set to this value + * in the default resetter implementation. + * * = Read only bits = * * Often registers have bits which are fixed and not affected by writes. To @@ -554,6 +560,7 @@ class RegisterBank : public RegisterBankBase using WriteFunc = std::function; using PartialWriteFunc = std::function< void (This ®, const Data &value, int first, int last)>; + using ResetFunc = std::function; private: Data _data = {}; @@ -564,6 +571,7 @@ class RegisterBank : public RegisterBankBase WriteFunc _writer = defaultWriter; PartialWriteFunc _partialWriter = defaultPartialWriter; PartialReadFunc _partialReader = defaultPartialReader; + ResetFunc _resetter = defaultResetter; protected: static Data defaultReader(This ®) { return reg.get(); } @@ -587,6 +595,12 @@ class RegisterBank : public RegisterBankBase mask(first, last))); } + static void + defaultResetter(This ®) + { + reg.get() = reg.initialValue(); + } + constexpr Data htoreg(Data data) { @@ -721,6 +735,30 @@ class RegisterBank : public RegisterBankBase return partialWriter(wrapper); } + // Set the callables which handle resetting. + // + // The default resetter restores the initial value used in the + // constructor. + constexpr This & + resetter(const ResetFunc &new_resetter) + { + _resetter = new_resetter; + return *this; + } + template + constexpr This & + resetter(Parent *parent, void (Parent::*nr)(Args... args)) + { + auto wrapper = [parent, nr](Args&&... args) { + return (parent->*nr)(std::forward(args)...); + }; + return resetter(wrapper); + } + + // An accessor which returns the initial value as set in the + // constructor. This is intended to be used in a resetter function. + const Data &initialValue() const { return _resetData; } + /* * Interface for accessing the register's state, for use by the @@ -817,7 +855,7 @@ class RegisterBank : public RegisterBankBase } // Reset our data to its initial value. - void reset() override { get() = _resetData; } + void reset() override { _resetter(*this); } }; private: diff --git a/src/dev/reg_bank.test.cc b/src/dev/reg_bank.test.cc index b4bc969724..4439526e35 100644 --- a/src/dev/reg_bank.test.cc +++ b/src/dev/reg_bank.test.cc @@ -868,6 +868,56 @@ TEST_F(TypedRegisterTest, PartialWriterReaderWriter) EXPECT_EQ(write_value, 0x0344); } +// Use the default resetter for a register. +TEST_F(TypedRegisterTest, DefaultResetter) +{ + BackingType initial_value = reg.get(); + + reg.get() = initial_value + 1; + EXPECT_EQ(reg.get(), initial_value + 1); + + reg.reset(); + + EXPECT_EQ(reg.get(), initial_value); +} + +// Set a custom resetter for a register. +TEST_F(TypedRegisterTest, Resetter) +{ + RegisterBankLE::Register *reg_ptr = nullptr; + + reg.resetter([®_ptr](auto &r) { + reg_ptr = &r; + }); + + reg.reset(); + + EXPECT_EQ(reg_ptr, ®); +} + +// Set a custom resetter for a register which is a class method. +TEST_F(TypedRegisterTest, ResetterMF) +{ + using Reg = RegisterBankLE::Register; + + struct ResetStruct + { + Reg *reg_ptr = nullptr; + + void + resetter(Reg &r) + { + reg_ptr = &r; + } + } reset_struct; + + reg.resetter(&reset_struct, &ResetStruct::resetter); + + reg.reset(); + + EXPECT_EQ(reset_struct.reg_ptr, ®); +} + TEST_F(TypedRegisterTest, Serialize) { std::ostringstream os; From a7ef5b77d6b8399437865261f862c286f83b0a85 Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Wed, 11 Jan 2023 13:19:10 +0800 Subject: [PATCH 110/492] mem: Implemement backdoor interface for Bridge Change-Id: I5ff62b03c34e41395a957a0799925ddd9c275458 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67291 Reviewed-by: Nicolas Boichat Tested-by: kokoro Maintainer: Gabe Black Reviewed-by: Gabe Black --- src/mem/bridge.cc | 15 +++++++++++++++ src/mem/bridge.hh | 28 ++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 0f744f7336..36832ebfc4 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -347,6 +347,14 @@ Bridge::BridgeResponsePort::recvAtomic(PacketPtr pkt) return delay * bridge.clockPeriod() + memSidePort.sendAtomic(pkt); } +Tick +Bridge::BridgeResponsePort::recvAtomicBackdoor( + PacketPtr pkt, MemBackdoorPtr &backdoor) +{ + return delay * bridge.clockPeriod() + memSidePort.sendAtomicBackdoor( + pkt, backdoor); +} + void Bridge::BridgeResponsePort::recvFunctional(PacketPtr pkt) { @@ -371,6 +379,13 @@ Bridge::BridgeResponsePort::recvFunctional(PacketPtr pkt) memSidePort.sendFunctional(pkt); } +void +Bridge::BridgeResponsePort::recvMemBackdoorReq( + const MemBackdoorReq &req, MemBackdoorPtr &backdoor) +{ + memSidePort.sendMemBackdoorReq(req, backdoor); +} + bool Bridge::BridgeRequestPort::trySatisfyFunctional(PacketPtr pkt) { diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index f56cef115f..e4a6837861 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -195,23 +195,35 @@ class Bridge : public ClockedObject /** When receiving a timing request from the peer port, pass it to the bridge. */ - bool recvTimingReq(PacketPtr pkt); + bool recvTimingReq(PacketPtr pkt) override; /** When receiving a retry request from the peer port, pass it to the bridge. */ - void recvRespRetry(); + void recvRespRetry() override; - /** When receiving a Atomic requestfrom the peer port, + /** When receiving an Atomic request from the peer port, pass it to the bridge. */ - Tick recvAtomic(PacketPtr pkt); + Tick recvAtomic(PacketPtr pkt) override; + + /** When receiving an Atomic backdoor request from the peer port, + pass it to the bridge. */ + Tick recvAtomicBackdoor( + PacketPtr pkt, MemBackdoorPtr &backdoor) override; + /** When receiving a Functional request from the peer port, pass it to the bridge. */ - void recvFunctional(PacketPtr pkt); + void recvFunctional(PacketPtr pkt) override; + + /** When receiving a Functional backdoor request from the peer port, + pass it to the bridge. */ + void recvMemBackdoorReq( + const MemBackdoorReq &req, MemBackdoorPtr &backdoor) override; + /** When receiving a address range request the peer port, pass it to the bridge. */ - AddrRangeList getAddrRanges() const; + AddrRangeList getAddrRanges() const override; }; @@ -303,11 +315,11 @@ class Bridge : public ClockedObject /** When receiving a timing request from the peer port, pass it to the bridge. */ - bool recvTimingResp(PacketPtr pkt); + bool recvTimingResp(PacketPtr pkt) override; /** When receiving a retry request from the peer port, pass it to the bridge. */ - void recvReqRetry(); + void recvReqRetry() override; }; /** Response port of the bridge. */ From a2658f08e5163de3b4c182633e9c730176fac4e5 Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Tue, 10 Jan 2023 00:27:53 -0800 Subject: [PATCH 111/492] systemc: fix -Wno-free-nonheap-object for building scheduler.cc -Wno-free-nonheap-object can happen at compile or link time depending on the versions. To better disable this false alarm, we move the memory management part into .cc file, so the check is always done at link time. This change also removes the global flags so other code is still checked with the flags. Change-Id: I8f1e20197b25c90b5f439e2ecc474bd99e4f82ed Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67237 Tested-by: kokoro Reviewed-by: Yu-hsin Wang Maintainer: Gabe Black --- SConstruct | 4 ---- src/sim/eventq.cc | 26 ++++++++++++++++++++++++++ src/sim/eventq.hh | 18 ++++-------------- src/systemc/core/SConscript | 6 ++---- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/SConstruct b/SConstruct index bd26e4552e..e08c2984e5 100755 --- a/SConstruct +++ b/SConstruct @@ -447,10 +447,6 @@ for variant_path in variant_paths: error('gcc version 7 or newer required.\n' 'Installed version:', env['CXXVERSION']) - with gem5_scons.Configure(env) as conf: - # This warning has a false positive in the systemc in g++ 11.1. - conf.CheckCxxFlag('-Wno-free-nonheap-object') - # Add the appropriate Link-Time Optimization (LTO) flags if # `--with-lto` is set. if GetOption('with_lto'): diff --git a/src/sim/eventq.cc b/src/sim/eventq.cc index 66d03854ac..23ca2f6f4e 100644 --- a/src/sim/eventq.cc +++ b/src/sim/eventq.cc @@ -108,6 +108,32 @@ Event::insertBefore(Event *event, Event *curr) return event; } +void +Event::acquire() +{ + if (flags.isSet(Event::Managed)) + acquireImpl(); +} + +void +Event::release() +{ + if (flags.isSet(Event::Managed)) + releaseImpl(); +} + +void +Event::acquireImpl() +{ +} + +void +Event::releaseImpl() +{ + if (!scheduled()) + delete this; +} + void EventQueue::insert(Event *event) { diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index cd5d285f93..62495bf86d 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -381,26 +381,16 @@ class Event : public EventBase, public Serializable /** * Managed event scheduled and being held in the event queue. */ - void acquire() - { - if (flags.isSet(Event::Managed)) - acquireImpl(); - } + void acquire(); /** * Managed event removed from the event queue. */ - void release() { - if (flags.isSet(Event::Managed)) - releaseImpl(); - } + void release(); - virtual void acquireImpl() {} + virtual void acquireImpl(); - virtual void releaseImpl() { - if (!scheduled()) - delete this; - } + virtual void releaseImpl(); /** @} */ diff --git a/src/systemc/core/SConscript b/src/systemc/core/SConscript index 2b8811187c..c7c9dbb40b 100644 --- a/src/systemc/core/SConscript +++ b/src/systemc/core/SConscript @@ -40,6 +40,7 @@ if env['CONF']['USE_SYSTEMC']: Source('port.cc') Source('process.cc') Source('sched_event.cc') + Source('scheduler.cc') Source('sensitivity.cc') Source('time.cc') @@ -75,7 +76,4 @@ if env['CONF']['USE_SYSTEMC']: # Disable the false positive warning for the event members of the scheduler. with gem5_scons.Configure(env) as conf: flag = '-Wno-free-nonheap-object' - append = {} - if conf.CheckCxxFlag(flag, autoadd=False): - append['CCFLAGS'] = [flag] - Source('scheduler.cc', append=append) + conf.CheckLinkFlag(flag) From 4954167fe51c3072229185356ffb1e55d5eb9f41 Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Wed, 4 Jan 2023 19:48:18 -0800 Subject: [PATCH 112/492] mem: create port_wrapper classes The port_wrapper classes convert the Request/ResponsePort from inherit-base to callback registrations. This help 'composition over inheritance' that most design pattern follows, which help reducing code length and increase reusability. Change-Id: Ia13cc62507ac8425bd7cf143a2e080d041c173f9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67232 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/mem/SConscript | 1 + src/mem/port_wrapper.cc | 169 ++++++++++++++++++++++++++++++++++++++++ src/mem/port_wrapper.hh | 159 +++++++++++++++++++++++++++++++++++++ 3 files changed, 329 insertions(+) create mode 100644 src/mem/port_wrapper.cc create mode 100644 src/mem/port_wrapper.hh diff --git a/src/mem/SConscript b/src/mem/SConscript index 3bcfc0d9c5..ca164c1e27 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -88,6 +88,7 @@ Source('packet.cc') Source('port.cc') Source('packet_queue.cc') Source('port_proxy.cc') +Source('port_wrapper.cc') Source('physical.cc') Source('shared_memory_server.cc') Source('simple_mem.cc') diff --git a/src/mem/port_wrapper.cc b/src/mem/port_wrapper.cc new file mode 100644 index 0000000000..fd5ebbd614 --- /dev/null +++ b/src/mem/port_wrapper.cc @@ -0,0 +1,169 @@ +/* + * Copyright 2023 Google, LLC. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/port_wrapper.hh" + +namespace gem5 +{ + +RequestPortWrapper::RequestPortWrapper(const std::string& name, + SimObject* _owner, PortID id) + : RequestPort(name, _owner, id) +{ +} + +void +RequestPortWrapper::recvRangeChange() +{ + if (!recvRangeChangeCb) { + RequestPort::recvRangeChange(); + return; + } + recvRangeChangeCb(); +} + +bool +RequestPortWrapper::recvTimingResp(PacketPtr packet) +{ + panic_if(!recvTimingRespCb, "RecvTimingRespCallback is empty."); + return recvTimingRespCb(packet); +} + +void +RequestPortWrapper::recvReqRetry() +{ + panic_if(!recvReqRetryCb, "RecvReqRetryCallback is empty."); + recvReqRetryCb(); +} + +void +RequestPortWrapper::setRangeChangeCallback(RecvReqRetryCallback cb) +{ + recvRangeChangeCb = std::move(cb); +} + +void +RequestPortWrapper::setTimingCallbacks(RecvTimingRespCallback resp_cb, + RecvReqRetryCallback retry_cb) +{ + recvTimingRespCb = std::move(resp_cb); + recvReqRetryCb = std::move(retry_cb); +} + +ResponsePortWrapper::ResponsePortWrapper(const std::string& name, + SimObject* _owner, PortID id) + : ResponsePort(name, _owner, id) +{ +} + +AddrRangeList +ResponsePortWrapper::getAddrRanges() const +{ + panic_if(!getAddrRangesCb, "GetAddrRangesCallback is empty."); + return getAddrRangesCb(); +} + +bool +ResponsePortWrapper::recvTimingReq(PacketPtr packet) +{ + panic_if(!recvTimingReqCb, "RecvTimingReqCallback is empty."); + return recvTimingReqCb(packet); +} + +void +ResponsePortWrapper::recvRespRetry() +{ + panic_if(!recvRespRetryCb, "RecvRespRetryCallback is empty."); + recvRespRetryCb(); +} + +Tick +ResponsePortWrapper::recvAtomic(PacketPtr packet) +{ + panic_if(!recvAtomicCb, "RecvAtomicCallback is empty."); + return recvAtomicCb(packet); +} + +Tick +ResponsePortWrapper::recvAtomicBackdoor(PacketPtr packet, + MemBackdoorPtr& backdoor) +{ + if (!recvAtomicBackdoorCb) { + return ResponsePort::recvAtomicBackdoor(packet, backdoor); + } + return recvAtomicBackdoorCb(packet, backdoor); +} + +void +ResponsePortWrapper::recvFunctional(PacketPtr packet) +{ + panic_if(!recvFunctionalCb, "RecvFunctionalCallback is empty."); + recvTimingReqCb(packet); +} + +void +ResponsePortWrapper::recvMemBackdoorReq(const MemBackdoorReq& req, + MemBackdoorPtr& backdoor) +{ + if (!recvMemBackdoorReqCb) { + ResponsePort::recvMemBackdoorReq(req, backdoor); + return; + } + recvMemBackdoorReqCb(req, backdoor); +} + +void +ResponsePortWrapper::setGetAddrRangesCallback(GetAddrRangesCallback cb) +{ + getAddrRangesCb = std::move(cb); +} + +void +ResponsePortWrapper::setTimingCallbacks(RecvTimingReqCallback timing_cb, + RecvRespRetryCallback retry_cb) +{ + recvTimingReqCb = std::move(timing_cb); + recvRespRetryCb = std::move(retry_cb); +} + +void +ResponsePortWrapper::setAtomicCallbacks(RecvAtomicCallback atomic_cb, + RecvAtomicBackdoorCallback backdoor_cb) +{ + recvAtomicCb = std::move(atomic_cb); + recvAtomicBackdoorCb = std::move(backdoor_cb); +} + +void +ResponsePortWrapper::setFunctionalCallbacks( + RecvFunctionalCallback func_cb, RecvMemBackdoorReqCallback backdoor_cb) +{ + recvFunctionalCb = std::move(func_cb); + recvMemBackdoorReqCb = std::move(backdoor_cb); +} + +} // namespace gem5 diff --git a/src/mem/port_wrapper.hh b/src/mem/port_wrapper.hh new file mode 100644 index 0000000000..5dcdd5dc9b --- /dev/null +++ b/src/mem/port_wrapper.hh @@ -0,0 +1,159 @@ +/* + * Copyright 2023 Google, LLC. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * PortWrapper Object Declaration. + * + * The RequestPortWrapper and ResponsePortWrapper converts inherit-based + * RequestPort and ResponsePort into callback-based. This help reducing + * redundant code and increase code reusability in most cases, allowing + * composition over inheritance pattern. + * + * Example usage: + * + * class MySimObject : public SimObject + * { + * public: + * ResponsePortWrapper inPort; + * + * MySimObject(...) : inPort("in_port", this)... { + * inPort.setGetAddrRangesCallback([this]() { + * return getRange(); + * }); + * + * inPort.setAtomicCallbacks([this](PacketPtr packet) { + * // process the packet + * ... + * return Tick(); + * }); + * } + * + * private: + * AddrRangeList getRange() const {...} + * }; + */ + +#ifndef __MEM_PORT_WRAPPER_HH__ +#define __MEM_PORT_WRAPPER_HH__ + +#include + +#include "mem/port.hh" + +namespace gem5 +{ + +/** + * The RequestPortWrapper converts inherit-based RequestPort into + * callback-based. + */ +class RequestPortWrapper : public RequestPort +{ + public: + using RecvRangeChangeCallback = std::function; + // Timing Protocol + using RecvTimingRespCallback = std::function; + using RecvReqRetryCallback = std::function; + + RequestPortWrapper(const std::string& name, SimObject* _owner, + PortID id = InvalidPortID); + + void recvRangeChange() override; + + // TimingRequestProtocol + bool recvTimingResp(PacketPtr) override; + void recvReqRetry() override; + + void setRangeChangeCallback(RecvReqRetryCallback); + void setTimingCallbacks(RecvTimingRespCallback, RecvReqRetryCallback); + + private: + RecvRangeChangeCallback recvRangeChangeCb = nullptr; + RecvTimingRespCallback recvTimingRespCb = nullptr; + RecvReqRetryCallback recvReqRetryCb = nullptr; +}; + +/** + * The ResponsePortWrapper converts inherit-based ResponsePort into + * callback-based. + */ +class ResponsePortWrapper : public ResponsePort +{ + public: + using GetAddrRangesCallback = std::function; + // Timing Protocol + using RecvTimingReqCallback = std::function; + // Atomic Protocol + using RecvAtomicCallback = std::function; + using RecvAtomicBackdoorCallback = + std::function; + + // Functional Protocol + using RecvFunctionalCallback = std::function; + using RecvMemBackdoorReqCallback = + std::function; + + using RecvRespRetryCallback = std::function; + + ResponsePortWrapper(const std::string& name, SimObject* _owner, + PortID id = InvalidPortID); + + AddrRangeList getAddrRanges() const override; + + // TimingResponseProtocol + bool recvTimingReq(PacketPtr) override; + void recvRespRetry() override; + + // AtomicResponseProtocol + Tick recvAtomic(PacketPtr) override; + Tick recvAtomicBackdoor(PacketPtr, MemBackdoorPtr&) override; + + // FunctionalResponseProtocol + void recvFunctional(PacketPtr) override; + void recvMemBackdoorReq(const MemBackdoorReq&, MemBackdoorPtr&) override; + + void setGetAddrRangesCallback(GetAddrRangesCallback); + void setTimingCallbacks(RecvTimingReqCallback, RecvRespRetryCallback); + void setAtomicCallbacks(RecvAtomicCallback, + RecvAtomicBackdoorCallback = nullptr); + void setFunctionalCallbacks(RecvFunctionalCallback, + RecvMemBackdoorReqCallback = nullptr); + + private: + GetAddrRangesCallback getAddrRangesCb = nullptr; + RecvTimingReqCallback recvTimingReqCb = nullptr; + RecvRespRetryCallback recvRespRetryCb = nullptr; + RecvAtomicCallback recvAtomicCb = nullptr; + RecvAtomicBackdoorCallback recvAtomicBackdoorCb = nullptr; + RecvFunctionalCallback recvFunctionalCb = nullptr; + RecvMemBackdoorReqCallback recvMemBackdoorReqCb = nullptr; +}; + +} // namespace gem5 + +#endif //__MEM_PORT_WRAPPER_HH__ From 8149245eccff03ebb716ff8d8ae755905bf4801a Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 24 Oct 2022 17:28:06 +0100 Subject: [PATCH 113/492] cpu: Formalize a CPU cluster class in the gem5 standard library Currently the gem5 standard library does not define a class to represent a cluster of CPUs. The SubSystem class has been extended in some python modules [1] to define clock/voltage domains shared by a group of CPUs (the cluster), and to provide some utility functions for top level configs. This patch is moving the aforementioned class within the gem5 standard library, to let other ISAs and scripts make use of it. Adding a cpu cluster class to the gem5 library will have the benefit of standardizing the interface to cpus in the toplevel configs Most of the new class still resides in the python world: we want the class to be as generic as possible and we want to make its use optional [1]: https://github.com/gem5/gem5/blob/v22.0.0.0/\ configs/example/arm/devices.py#L96 Change-Id: Idb05263a244e28bffa9eac811c6deb62ebb76a74 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65891 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/CpuCluster.py | 93 +++++++++++++++++++++++++++++++++++++++++++ src/cpu/SConscript | 1 + src/cpu/cluster.hh | 58 +++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 src/cpu/CpuCluster.py create mode 100644 src/cpu/cluster.hh diff --git a/src/cpu/CpuCluster.py b/src/cpu/CpuCluster.py new file mode 100644 index 0000000000..31fdc4977d --- /dev/null +++ b/src/cpu/CpuCluster.py @@ -0,0 +1,93 @@ +# Copyright (c) 2022 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.objects.SubSystem import SubSystem + + +class CpuCluster(SubSystem): + type = "CpuCluster" + cxx_header = "cpu/cluster.hh" + cxx_class = "gem5::CpuCluster" + + _NUM_CPUS = 0 + _NUM_CLUSTERS = 0 + + voltage_domain = Param.VoltageDomain("Voltage domain") + clk_domain = Param.ClockDomain("Clock domain") + + def __iter__(self): + return iter(self.cpus) + + def generate_cpus(self, cpu_type: "BaseCPU", num_cpus: int): + """ + Instantiates the cpus within the cluster provided + theit type and their number. + + :param cpu_type: The cpu class + :param num_cpus: The number of cpus within the cluster + """ + self.cpus = [ + cpu_type( + cpu_id=CpuCluster._NUM_CPUS + idx, clk_domain=self.clk_domain + ) + for idx in range(num_cpus) + ] + + for cpu in self.cpus: + cpu.createThreads() + cpu.createInterruptController() + cpu.socket_id = CpuCluster._NUM_CLUSTERS + + # "Register" the cluster/cpus by augmenting the + # class variables + CpuCluster._NUM_CPUS += num_cpus + CpuCluster._NUM_CLUSTERS += 1 + + def connect(self, membus: "SystemXBar"): + """ + Connects every cpu within the cluster with the + provided bus + + :param membus: The system crossbar + """ + for cpu in self.cpus: + cpu.connectBus(membus) + + def memory_mode(self) -> "MemoryMode": + return type(self.cpus[0]).memory_mode() + + def require_caches(self) -> bool: + return type(self.cpus[0]).require_caches() diff --git a/src/cpu/SConscript b/src/cpu/SConscript index 0466f11433..d6dcd2f6ea 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -93,6 +93,7 @@ Source('inst_pb_trace.cc', tags='protobuf') SimObject('CheckerCPU.py', sim_objects=['CheckerCPU']) SimObject('BaseCPU.py', sim_objects=['BaseCPU']) +SimObject('CpuCluster.py', sim_objects=['CpuCluster']) SimObject('CPUTracers.py', sim_objects=[ 'ExeTracer', 'IntelTrace', 'NativeTrace']) SimObject('TimingExpr.py', sim_objects=[ diff --git a/src/cpu/cluster.hh b/src/cpu/cluster.hh new file mode 100644 index 0000000000..623378ac79 --- /dev/null +++ b/src/cpu/cluster.hh @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2022 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CLUSTER_HH__ +#define __CPU_CLUSTER_HH__ + +#include "sim/sub_system.hh" +#include "params/CpuCluster.hh" + +namespace gem5 +{ + +class CpuCluster : public SubSystem +{ + public: + PARAMS(CpuCluster); + CpuCluster(const Params &p) + : SubSystem(p) + {} +}; + +} // namespace gem5 + +#endif // __CPU_CLUSTER_HH__ From 899f702f122b80f7f0cb24a9a04015cef7daa1b5 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Tue, 25 Oct 2022 09:31:10 +0100 Subject: [PATCH 114/492] configs: Start using the new CpuCluster class in example/arm Change-Id: I061c6255449dd126cdd1a6935bea510ebe2e8e14 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65892 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Reviewed-by: Yu-hsin Wang --- configs/example/arm/baremetal.py | 2 +- configs/example/arm/devices.py | 56 ++++++++--------------------- configs/example/arm/fs_bigLITTLE.py | 31 +++++++--------- configs/example/arm/ruby_fs.py | 2 +- configs/example/arm/starter_fs.py | 2 +- configs/example/arm/starter_se.py | 4 +-- 6 files changed, 32 insertions(+), 65 deletions(-) diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py index a8db6bacd4..fc630e5299 100644 --- a/configs/example/arm/baremetal.py +++ b/configs/example/arm/baremetal.py @@ -122,7 +122,7 @@ def create(args): # Add CPU clusters to the system system.cpu_cluster = [ - devices.CpuCluster( + devices.ArmCpuCluster( system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu] ) ] diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py index c6560d74dd..3f005a49aa 100644 --- a/configs/example/arm/devices.py +++ b/configs/example/arm/devices.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2017, 2019, 2021 Arm Limited +# Copyright (c) 2016-2017, 2019, 2021-2022 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -95,7 +95,7 @@ class MemBus(SystemXBar): default = Self.badaddr_responder.pio -class CpuCluster(SubSystem): +class ArmCpuCluster(CpuCluster): def __init__( self, system, @@ -107,7 +107,7 @@ class CpuCluster(SubSystem): l1d_type, l2_type, ): - super(CpuCluster, self).__init__() + super().__init__() self._cpu_type = cpu_type self._l1i_type = l1i_type self._l1d_type = l1d_type @@ -120,24 +120,9 @@ class CpuCluster(SubSystem): clock=cpu_clock, voltage_domain=self.voltage_domain ) - self.cpus = [ - self._cpu_type( - cpu_id=system.numCpus() + idx, clk_domain=self.clk_domain - ) - for idx in range(num_cpus) - ] + self.generate_cpus(cpu_type, num_cpus) - for cpu in self.cpus: - cpu.createThreads() - cpu.createInterruptController() - cpu.socket_id = system.numCpuClusters() - system.addCpuCluster(self, num_cpus) - - def requireCaches(self): - return self._cpu_type.require_caches() - - def memoryMode(self): - return self._cpu_type.memory_mode() + system.addCpuCluster(self) def addL1(self): for cpu in self.cpus: @@ -191,7 +176,7 @@ class CpuCluster(SubSystem): cpu.connectCachedPorts(bus.cpu_side_ports) -class AtomicCluster(CpuCluster): +class AtomicCluster(ArmCpuCluster): def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"): cpu_config = [ ObjectList.cpu_list.get("AtomicSimpleCPU"), @@ -199,28 +184,24 @@ class AtomicCluster(CpuCluster): None, None, ] - super(AtomicCluster, self).__init__( - system, num_cpus, cpu_clock, cpu_voltage, *cpu_config - ) + super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) def addL1(self): pass -class KvmCluster(CpuCluster): +class KvmCluster(ArmCpuCluster): def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"): cpu_config = [ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None, None] - super(KvmCluster, self).__init__( - system, num_cpus, cpu_clock, cpu_voltage, *cpu_config - ) + super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) def addL1(self): pass -class FastmodelCluster(SubSystem): +class FastmodelCluster(CpuCluster): def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"): - super(FastmodelCluster, self).__init__() + super().__init__() # Setup GIC gic = system.realview.gic @@ -285,12 +266,12 @@ class FastmodelCluster(SubSystem): self.cpu_hub.a2t = a2t self.cpu_hub.t2g = t2g - system.addCpuCluster(self, num_cpus) + system.addCpuCluster(self) - def requireCaches(self): + def require_caches(self): return False - def memoryMode(self): + def memory_mode(self): return "atomic_noncaching" def addL1(self): @@ -330,7 +311,6 @@ class BaseSimpleSystem(ArmSystem): self.mem_ranges = self.getMemRanges(int(Addr(mem_size))) self._clusters = [] - self._num_cpus = 0 def getMemRanges(self, mem_size): """ @@ -357,14 +337,8 @@ class BaseSimpleSystem(ArmSystem): def numCpuClusters(self): return len(self._clusters) - def addCpuCluster(self, cpu_cluster, num_cpus): - assert cpu_cluster not in self._clusters - assert num_cpus > 0 + def addCpuCluster(self, cpu_cluster): self._clusters.append(cpu_cluster) - self._num_cpus += num_cpus - - def numCpus(self): - return self._num_cpus def addCaches(self, need_caches, last_cache_level): if not need_caches: diff --git a/configs/example/arm/fs_bigLITTLE.py b/configs/example/arm/fs_bigLITTLE.py index c188de663a..060c51ec3c 100644 --- a/configs/example/arm/fs_bigLITTLE.py +++ b/configs/example/arm/fs_bigLITTLE.py @@ -79,7 +79,7 @@ def _using_pdes(root): return False -class BigCluster(devices.CpuCluster): +class BigCluster(devices.ArmCpuCluster): def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"): cpu_config = [ ObjectList.cpu_list.get("O3_ARM_v7a_3"), @@ -87,12 +87,10 @@ class BigCluster(devices.CpuCluster): devices.L1D, devices.L2, ] - super(BigCluster, self).__init__( - system, num_cpus, cpu_clock, cpu_voltage, *cpu_config - ) + super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) -class LittleCluster(devices.CpuCluster): +class LittleCluster(devices.ArmCpuCluster): def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"): cpu_config = [ ObjectList.cpu_list.get("MinorCPU"), @@ -100,9 +98,7 @@ class LittleCluster(devices.CpuCluster): devices.L1D, devices.L2, ] - super(LittleCluster, self).__init__( - system, num_cpus, cpu_clock, cpu_voltage, *cpu_config - ) + super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) class Ex5BigCluster(devices.CpuCluster): @@ -113,9 +109,7 @@ class Ex5BigCluster(devices.CpuCluster): ex5_big.L1D, ex5_big.L2, ] - super(Ex5BigCluster, self).__init__( - system, num_cpus, cpu_clock, cpu_voltage, *cpu_config - ) + super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) class Ex5LittleCluster(devices.CpuCluster): @@ -126,9 +120,7 @@ class Ex5LittleCluster(devices.CpuCluster): ex5_LITTLE.L1D, ex5_LITTLE.L2, ] - super(Ex5LittleCluster, self).__init__( - system, num_cpus, cpu_clock, cpu_voltage, *cpu_config - ) + super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) def createSystem( @@ -376,7 +368,7 @@ def build(options): system.bigCluster = big_model( system, options.big_cpus, options.big_cpu_clock ) - system.mem_mode = system.bigCluster.memoryMode() + system.mem_mode = system.bigCluster.memory_mode() all_cpus += system.bigCluster.cpus # little cluster @@ -384,23 +376,24 @@ def build(options): system.littleCluster = little_model( system, options.little_cpus, options.little_cpu_clock ) - system.mem_mode = system.littleCluster.memoryMode() + system.mem_mode = system.littleCluster.memory_mode() all_cpus += system.littleCluster.cpus # Figure out the memory mode if ( options.big_cpus > 0 and options.little_cpus > 0 - and system.bigCluster.memoryMode() != system.littleCluster.memoryMode() + and system.bigCluster.memory_mode() + != system.littleCluster.memory_mode() ): m5.util.panic("Memory mode missmatch among CPU clusters") # create caches system.addCaches(options.caches, options.last_cache_level) if not options.caches: - if options.big_cpus > 0 and system.bigCluster.requireCaches(): + if options.big_cpus > 0 and system.bigCluster.require_caches(): m5.util.panic("Big CPU model requires caches") - if options.little_cpus > 0 and system.littleCluster.requireCaches(): + if options.little_cpus > 0 and system.littleCluster.require_caches(): m5.util.panic("Little CPU model requires caches") # Create a KVM VM and do KVM-specific configuration diff --git a/configs/example/arm/ruby_fs.py b/configs/example/arm/ruby_fs.py index d58184522c..fd36319363 100644 --- a/configs/example/arm/ruby_fs.py +++ b/configs/example/arm/ruby_fs.py @@ -115,7 +115,7 @@ def create(args): # Add CPU clusters to the system system.cpu_cluster = [ - devices.CpuCluster( + devices.ArmCpuCluster( system, args.num_cpus, args.cpu_freq, diff --git a/configs/example/arm/starter_fs.py b/configs/example/arm/starter_fs.py index 3a9a8762d6..7d7ab71768 100644 --- a/configs/example/arm/starter_fs.py +++ b/configs/example/arm/starter_fs.py @@ -128,7 +128,7 @@ def create(args): # Add CPU clusters to the system system.cpu_cluster = [ - devices.CpuCluster( + devices.ArmCpuCluster( system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu] ) ] diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py index 08c3d74fbd..ccdbe4f847 100644 --- a/configs/example/arm/starter_se.py +++ b/configs/example/arm/starter_se.py @@ -102,14 +102,14 @@ class SimpleSeSystem(System): # Create a cache hierarchy (unless we are simulating a # functional CPU in atomic memory mode) for the CPU cluster # and connect it to the shared memory bus. - if self.cpu_cluster.memoryMode() == "timing": + if self.cpu_cluster.memory_mode() == "timing": self.cpu_cluster.addL1() self.cpu_cluster.addL2(self.cpu_cluster.clk_domain) self.cpu_cluster.connectMemSide(self.membus) # Tell gem5 about the memory mode used by the CPUs we are # simulating. - self.mem_mode = self.cpu_cluster.memoryMode() + self.mem_mode = self.cpu_cluster.memory_mode() def numCpuClusters(self): return len(self._clusters) From 76b74fa51f0c691dbb3ea4c5272dac8add8913cb Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Fri, 13 Jan 2023 16:26:03 +0000 Subject: [PATCH 115/492] util: use origin/develop as default upstream branch The master branch is not in use anymore and it has been renamed to develop instead Change-Id: Ib9ea6e137f1b9284fb8147268b8691d002d3f90a Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67331 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- util/maint/list_changes.py | 2 +- util/maint/show_changes_by_file.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/util/maint/list_changes.py b/util/maint/list_changes.py index 465ae1abb0..87e4ea2d20 100755 --- a/util/maint/list_changes.py +++ b/util/maint/list_changes.py @@ -178,7 +178,7 @@ def _main(): "--upstream", "-u", type=str, - default="origin/master", + default="origin/develop", help="Upstream branch for comparison. Default: %(default)s", ) parser.add_argument( diff --git a/util/maint/show_changes_by_file.py b/util/maint/show_changes_by_file.py index ea739f78fe..d5055c1ff4 100755 --- a/util/maint/show_changes_by_file.py +++ b/util/maint/show_changes_by_file.py @@ -94,7 +94,7 @@ def _main(): "--upstream", "-u", type=str, - default="origin/master", + default="origin/develop", help="Upstream branch for comparison. Default: %(default)s", ) parser.add_argument( From f7857867ae54fc868e265d1aa2ea171b413c1776 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 12 Jan 2023 01:05:32 -0800 Subject: [PATCH 116/492] fastmodel: Export the "reset_in" reset signal from the PL330. This is essentially the same as how the reset signals were exported from the CortexR52 which I used as an example, except here there is only one reset. I passed through with the same name rather than calling it "model_reset" as in the CortexR52 since the pass through is trivial, and renaming the signal with no additional functionality seemed like it would just create confusion. In the CortexR52 case it makes more sense since there are multiple reset lines that need to be toggled to actually cause a reset, and a level of abstraction is actually helpful. Change-Id: I6b61fed6eb1566d131d4b0367fe4ae65031b25f8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67351 Maintainer: Gabe Black Reviewed-by: Yu-hsin Wang Tested-by: kokoro --- src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py | 3 +++ src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa | 5 +++++ src/arch/arm/fastmodel/PL330_DMAC/pl330.cc | 8 +++++++- src/arch/arm/fastmodel/PL330_DMAC/pl330.hh | 3 +++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py index ad43fed237..21ead525d3 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py +++ b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py @@ -26,6 +26,7 @@ from m5.params import * from m5.objects.FastModel import AmbaInitiatorSocket, AmbaTargetSocket from m5.objects.IntPin import IntSourcePin +from m5.objects.ResetPort import ResetResponsePort from m5.objects.SystemC import SystemC_ScModule @@ -197,6 +198,8 @@ class FastModelPL330(SystemC_ScModule): pio_s = AmbaTargetSocket(64, "Register accesses (secure)") pio_ns = AmbaTargetSocket(64, "Register accesses (non-secure)") + reset_in = ResetResponsePort("System reset") + # irq_abort_master_port # irq_master_port # pvbus_m diff --git a/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa b/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa index 3c31c90d87..d57dfdad3d 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa +++ b/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa @@ -64,6 +64,9 @@ component PL330 // Interrupts. pl330.irq_master_port => self.irq; pl330.irq_abort_master_port => self.irq_abort; + + // Reset signals. + self.reset_in => pl330.reset_in; } properties @@ -85,4 +88,6 @@ component PL330 master port irq[32]; master port irq_abort; + + slave port reset_in; } diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc index e582404c8c..13162bd409 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc +++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc @@ -45,7 +45,8 @@ PL330::PL330(const FastModelPL330Params ¶ms, dma(amba_m, params.name + ".dma", -1), pioS(amba_s, params.name + ".pio_s", -1), pioNs(amba_s_ns, params.name + ".pio_ns", -1), - irqAbortReceiver("irq_abort_receiver") + irqAbortReceiver("irq_abort_receiver"), + resetIn("reset_in", 0) { set_parameter("pl330.fifo_size", params.fifo_size); set_parameter("pl330.max_transfer", params.max_transfer); @@ -211,6 +212,9 @@ PL330::PL330(const FastModelPL330Params ¶ms, // And install it. irqAbortReceiver.onChange(abort_change); + + // Plumb the reset signal. + resetIn.signal_out.bind(this->reset_in); } void @@ -250,6 +254,8 @@ PL330::gem5_getPort(const std::string &if_name, int idx) } if (port != -1 && port < irqPort.size()) return *irqPort[port].at(idx); + } else if (if_name == "reset_in") { + return resetIn; } return scx_evs_PL330::gem5_getPort(if_name, idx); diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh index 3af56f2e6e..389f7047c7 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh +++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh @@ -39,6 +39,7 @@ #include "arch/arm/fastmodel/amba_ports.hh" #include "arch/arm/fastmodel/common/signal_receiver.hh" +#include "arch/arm/fastmodel/common/signal_sender.hh" #include "arch/arm/fastmodel/protocol/exported_clock_rate_control.hh" #include "dev/intpin.hh" #include "params/FastModelPL330.hh" @@ -73,6 +74,8 @@ class PL330 : public scx_evs_PL330 void allocateIrq(int idx, int count); + SignalSender resetIn; + public: PL330(const FastModelPL330Params ¶ms, sc_core::sc_module_name _name); PL330(const FastModelPL330Params ¶ms) : From 6e74deb46f3f296107eb9bfbfe96f87d7d1940be Mon Sep 17 00:00:00 2001 From: Nathanael Premillieu Date: Tue, 13 Dec 2022 14:31:12 +0100 Subject: [PATCH 117/492] mem-cache: use MMU instead of TLB in prefetchers BaseMMU object is now the entry point for translation requests. In the prefetchers, a BaseTLB object is still used if translation is needed. This patch is changing it to a BaseMMU object. Change-Id: I47dc92d4bc4a5c4f7c4c6181f7b7e126db6bd529 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66831 Tested-by: kokoro Reviewed-by: Daniel Carvalho Maintainer: Daniel Carvalho --- src/mem/cache/prefetch/Prefetcher.py | 12 ++++++------ src/mem/cache/prefetch/base.cc | 8 ++++---- src/mem/cache/prefetch/base.hh | 10 +++++----- src/mem/cache/prefetch/queued.cc | 12 ++++++------ src/mem/cache/prefetch/queued.hh | 6 +++--- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index 397711c09a..a350319258 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -64,7 +64,7 @@ class BasePrefetcher(ClockedObject): abstract = True cxx_class = "gem5::prefetch::Base" cxx_header = "mem/cache/prefetch/base.hh" - cxx_exports = [PyBindMethod("addEventProbe"), PyBindMethod("addTLB")] + cxx_exports = [PyBindMethod("addEventProbe"), PyBindMethod("addMMU")] sys = Param.System(Parent.any, "System this prefetcher belongs to") # Get the block size from the parent (system) @@ -93,7 +93,7 @@ class BasePrefetcher(ClockedObject): def __init__(self, **kwargs): super().__init__(**kwargs) self._events = [] - self._tlbs = [] + self._mmus = [] def addEvent(self, newObject): self._events.append(newObject) @@ -101,8 +101,8 @@ class BasePrefetcher(ClockedObject): # Override the normal SimObject::regProbeListeners method and # register deferred event handlers. def regProbeListeners(self): - for tlb in self._tlbs: - self.getCCObject().addTLB(tlb.getCCObject()) + for mmu in self._mmus: + self.getCCObject().addMMU(mmu.getCCObject()) for event in self._events: event.register() self.getCCObject().regProbeListeners() @@ -114,10 +114,10 @@ class BasePrefetcher(ClockedObject): raise TypeError("probeNames must have at least one element") self.addEvent(HWPProbeEvent(self, simObj, *probeNames)) - def registerTLB(self, simObj): + def registerMMU(self, simObj): if not isinstance(simObj, SimObject): raise TypeError("argument must be a SimObject type") - self._tlbs.append(simObj) + self._mmus.append(simObj) class MultiPrefetcher(BasePrefetcher): diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index cb4c1e8118..9ff81fba68 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -103,7 +103,7 @@ Base::Base(const BasePrefetcherParams &p) prefetchOnPfHit(p.prefetch_on_pf_hit), useVirtualAddresses(p.use_virtual_addresses), prefetchStats(this), issuedPrefetches(0), - usefulPrefetches(0), tlb(nullptr) + usefulPrefetches(0), mmu(nullptr) { } @@ -299,10 +299,10 @@ Base::addEventProbe(SimObject *obj, const char *name) } void -Base::addTLB(BaseTLB *t) +Base::addMMU(BaseMMU *m) { - fatal_if(tlb != nullptr, "Only one TLB can be registered"); - tlb = t; + fatal_if(mmu != nullptr, "Only one MMU can be registered"); + mmu = m; } } // namespace prefetch diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh index f2a8207d35..e5e43e534f 100644 --- a/src/mem/cache/prefetch/base.hh +++ b/src/mem/cache/prefetch/base.hh @@ -364,8 +364,8 @@ class Base : public ClockedObject /** Total prefetches that has been useful */ uint64_t usefulPrefetches; - /** Registered tlb for address translations */ - BaseTLB * tlb; + /** Registered mmu for address translations */ + BaseMMU * mmu; public: Base(const BasePrefetcherParams &p); @@ -437,12 +437,12 @@ class Base : public ClockedObject void addEventProbe(SimObject *obj, const char *name); /** - * Add a BaseTLB object to be used whenever a translation is needed. + * Add a BaseMMU object to be used whenever a translation is needed. * This is generally required when the prefetcher is allowed to generate * page crossing references and/or uses virtual addresses for training. - * @param tlb pointer to the BaseTLB object to add + * @param mmu pointer to the BaseMMU object to add */ - void addTLB(BaseTLB *tlb); + void addMMU(BaseMMU *mmu); }; } // namespace prefetch diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc index da9cbf479e..b85a227f00 100644 --- a/src/mem/cache/prefetch/queued.cc +++ b/src/mem/cache/prefetch/queued.cc @@ -78,13 +78,13 @@ Queued::DeferredPacket::createPkt(Addr paddr, unsigned blk_size, } void -Queued::DeferredPacket::startTranslation(BaseTLB *tlb) +Queued::DeferredPacket::startTranslation(BaseMMU *mmu) { assert(translationRequest != nullptr); if (!ongoingTranslation) { ongoingTranslation = true; // Prefetchers only operate in Timing mode - tlb->translateTiming(translationRequest, tc, this, BaseMMU::Read); + mmu->translateTiming(translationRequest, tc, this, BaseMMU::Read); } } @@ -216,7 +216,7 @@ Queued::notify(const PacketPtr &pkt, const PrefetchInfo &pfi) } } - bool can_cross_page = (tlb != nullptr); + bool can_cross_page = (mmu != nullptr); if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) { PrefetchInfo new_pfi(pfi,addr_prio.first); statsQueued.pfIdentified++; @@ -293,7 +293,7 @@ Queued::processMissingTranslations(unsigned max) // Increase the iterator first because dp.startTranslation can end up // calling finishTranslation, which will erase "it" it++; - dp.startTranslation(tlb); + dp.startTranslation(mmu); count += 1; } } @@ -311,7 +311,7 @@ Queued::translationComplete(DeferredPacket *dp, bool failed) assert(it != pfqMissingTranslation.end()); if (!failed) { DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: " - "paddr %#x \n", tlb->name(), + "paddr %#x \n", mmu->name(), it->translationRequest->getVaddr(), it->translationRequest->getPaddr()); Addr target_paddr = it->translationRequest->getPaddr(); @@ -329,7 +329,7 @@ Queued::translationComplete(DeferredPacket *dp, bool failed) } } else { DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping " - "prefetch request %#x \n", tlb->name(), + "prefetch request %#x \n", mmu->name(), it->translationRequest->getVaddr()); } pfqMissingTranslation.erase(it); diff --git a/src/mem/cache/prefetch/queued.hh b/src/mem/cache/prefetch/queued.hh index c769b3875a..87d3456def 100644 --- a/src/mem/cache/prefetch/queued.hh +++ b/src/mem/cache/prefetch/queued.hh @@ -134,10 +134,10 @@ class Queued : public Base ThreadContext *tc, BaseMMU::Mode mode) override; /** - * Issues the translation request to the provided TLB - * @param tlb the tlb that has to translate the address + * Issues the translation request to the provided MMU + * @param mmu the mmu that has to translate the address */ - void startTranslation(BaseTLB *tlb); + void startTranslation(BaseMMU *mmu); }; std::list pfq; From d4c1904ce63082c30c01f2acebca7097e7eb612e Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:35:26 -0300 Subject: [PATCH 118/492] mem-cache: Remove the ReplacementPolicy namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: If4904706b897999e9200b163d47679519f01e4d4 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67352 Maintainer: Jason Lowe-Power Reviewed-by: Richard Cooper Tested-by: kokoro --- src/mem/cache/prefetch/stride.hh | 1 - src/mem/cache/replacement_policies/base.hh | 1 - src/mem/cache/replacement_policies/bip_rp.cc | 1 - src/mem/cache/replacement_policies/bip_rp.hh | 1 - src/mem/cache/replacement_policies/brrip_rp.cc | 1 - src/mem/cache/replacement_policies/brrip_rp.hh | 1 - src/mem/cache/replacement_policies/dueling_rp.hh | 1 - src/mem/cache/replacement_policies/fifo_rp.cc | 3 ++- src/mem/cache/replacement_policies/fifo_rp.hh | 1 - src/mem/cache/replacement_policies/lfu_rp.cc | 1 - src/mem/cache/replacement_policies/lfu_rp.hh | 1 - src/mem/cache/replacement_policies/lru_rp.cc | 1 - src/mem/cache/replacement_policies/lru_rp.hh | 1 - src/mem/cache/replacement_policies/mru_rp.cc | 1 - src/mem/cache/replacement_policies/mru_rp.hh | 1 - src/mem/cache/replacement_policies/random_rp.cc | 1 - src/mem/cache/replacement_policies/random_rp.hh | 1 - src/mem/cache/replacement_policies/replaceable_entry.hh | 1 - src/mem/cache/replacement_policies/second_chance_rp.cc | 1 - src/mem/cache/replacement_policies/second_chance_rp.hh | 1 - src/mem/cache/replacement_policies/ship_rp.hh | 1 - src/mem/cache/replacement_policies/tree_plru_rp.cc | 1 - src/mem/cache/replacement_policies/tree_plru_rp.hh | 1 - src/mem/cache/replacement_policies/weighted_lru_rp.cc | 1 - src/mem/cache/replacement_policies/weighted_lru_rp.hh | 1 - src/mem/cache/tags/sector_tags.hh | 1 - 26 files changed, 2 insertions(+), 26 deletions(-) diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh index 2b70765ba4..27fa917436 100644 --- a/src/mem/cache/prefetch/stride.hh +++ b/src/mem/cache/prefetch/stride.hh @@ -64,7 +64,6 @@ namespace gem5 { class BaseIndexingPolicy; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { class Base; diff --git a/src/mem/cache/replacement_policies/base.hh b/src/mem/cache/replacement_policies/base.hh index fc92ecb6ae..2c23c950b2 100644 --- a/src/mem/cache/replacement_policies/base.hh +++ b/src/mem/cache/replacement_policies/base.hh @@ -45,7 +45,6 @@ namespace gem5 */ typedef std::vector ReplacementCandidates; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/bip_rp.cc b/src/mem/cache/replacement_policies/bip_rp.cc index 102037ddfa..812c36bb71 100644 --- a/src/mem/cache/replacement_policies/bip_rp.cc +++ b/src/mem/cache/replacement_policies/bip_rp.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/bip_rp.hh b/src/mem/cache/replacement_policies/bip_rp.hh index 486f4597dd..0b830e0b79 100644 --- a/src/mem/cache/replacement_policies/bip_rp.hh +++ b/src/mem/cache/replacement_policies/bip_rp.hh @@ -49,7 +49,6 @@ namespace gem5 struct BIPRPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/brrip_rp.cc b/src/mem/cache/replacement_policies/brrip_rp.cc index a28ad339d5..06dad0d9fb 100644 --- a/src/mem/cache/replacement_policies/brrip_rp.cc +++ b/src/mem/cache/replacement_policies/brrip_rp.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/brrip_rp.hh b/src/mem/cache/replacement_policies/brrip_rp.hh index f4f815e056..5649a64070 100644 --- a/src/mem/cache/replacement_policies/brrip_rp.hh +++ b/src/mem/cache/replacement_policies/brrip_rp.hh @@ -60,7 +60,6 @@ namespace gem5 struct BRRIPRPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/dueling_rp.hh b/src/mem/cache/replacement_policies/dueling_rp.hh index a4510508ef..c7400b4972 100644 --- a/src/mem/cache/replacement_policies/dueling_rp.hh +++ b/src/mem/cache/replacement_policies/dueling_rp.hh @@ -41,7 +41,6 @@ namespace gem5 struct DuelingRPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/fifo_rp.cc b/src/mem/cache/replacement_policies/fifo_rp.cc index bc0680bc8a..199ba0a429 100644 --- a/src/mem/cache/replacement_policies/fifo_rp.cc +++ b/src/mem/cache/replacement_policies/fifo_rp.cc @@ -36,9 +36,10 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); + namespace replacement_policy { + FIFO::FIFO(const Params &p) : Base(p) { diff --git a/src/mem/cache/replacement_policies/fifo_rp.hh b/src/mem/cache/replacement_policies/fifo_rp.hh index 4b62fd220a..255666865d 100644 --- a/src/mem/cache/replacement_policies/fifo_rp.hh +++ b/src/mem/cache/replacement_policies/fifo_rp.hh @@ -44,7 +44,6 @@ namespace gem5 struct FIFORPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/lfu_rp.cc b/src/mem/cache/replacement_policies/lfu_rp.cc index a715f7d0dc..fc3495465e 100644 --- a/src/mem/cache/replacement_policies/lfu_rp.cc +++ b/src/mem/cache/replacement_policies/lfu_rp.cc @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/lfu_rp.hh b/src/mem/cache/replacement_policies/lfu_rp.hh index aa058c46bd..58c057ce35 100644 --- a/src/mem/cache/replacement_policies/lfu_rp.hh +++ b/src/mem/cache/replacement_policies/lfu_rp.hh @@ -44,7 +44,6 @@ namespace gem5 struct LFURPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/lru_rp.cc b/src/mem/cache/replacement_policies/lru_rp.cc index c22f3fe2ba..cbec50eb73 100644 --- a/src/mem/cache/replacement_policies/lru_rp.cc +++ b/src/mem/cache/replacement_policies/lru_rp.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/lru_rp.hh b/src/mem/cache/replacement_policies/lru_rp.hh index 620117dca5..6feaa4f73d 100644 --- a/src/mem/cache/replacement_policies/lru_rp.hh +++ b/src/mem/cache/replacement_policies/lru_rp.hh @@ -42,7 +42,6 @@ namespace gem5 struct LRURPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/mru_rp.cc b/src/mem/cache/replacement_policies/mru_rp.cc index 18b0d65e89..5040c22206 100644 --- a/src/mem/cache/replacement_policies/mru_rp.cc +++ b/src/mem/cache/replacement_policies/mru_rp.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/mru_rp.hh b/src/mem/cache/replacement_policies/mru_rp.hh index 1657ace3d5..5b5f0bf03d 100644 --- a/src/mem/cache/replacement_policies/mru_rp.hh +++ b/src/mem/cache/replacement_policies/mru_rp.hh @@ -44,7 +44,6 @@ namespace gem5 struct MRURPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/random_rp.cc b/src/mem/cache/replacement_policies/random_rp.cc index fc6c431b9d..8711c85ba3 100644 --- a/src/mem/cache/replacement_policies/random_rp.cc +++ b/src/mem/cache/replacement_policies/random_rp.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/random_rp.hh b/src/mem/cache/replacement_policies/random_rp.hh index 9c383d5418..a2b384563c 100644 --- a/src/mem/cache/replacement_policies/random_rp.hh +++ b/src/mem/cache/replacement_policies/random_rp.hh @@ -42,7 +42,6 @@ namespace gem5 struct RandomRPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/replaceable_entry.hh b/src/mem/cache/replacement_policies/replaceable_entry.hh index 6c56bca394..bb88cefd1d 100644 --- a/src/mem/cache/replacement_policies/replaceable_entry.hh +++ b/src/mem/cache/replacement_policies/replaceable_entry.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/second_chance_rp.cc b/src/mem/cache/replacement_policies/second_chance_rp.cc index df506c638b..963052ee7f 100644 --- a/src/mem/cache/replacement_policies/second_chance_rp.cc +++ b/src/mem/cache/replacement_policies/second_chance_rp.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/second_chance_rp.hh b/src/mem/cache/replacement_policies/second_chance_rp.hh index 4d0a36cdeb..79085d1d53 100644 --- a/src/mem/cache/replacement_policies/second_chance_rp.hh +++ b/src/mem/cache/replacement_policies/second_chance_rp.hh @@ -46,7 +46,6 @@ namespace gem5 struct SecondChanceRPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/ship_rp.hh b/src/mem/cache/replacement_policies/ship_rp.hh index fa27540adb..edf4ff5a45 100644 --- a/src/mem/cache/replacement_policies/ship_rp.hh +++ b/src/mem/cache/replacement_policies/ship_rp.hh @@ -51,7 +51,6 @@ struct SHiPRPParams; struct SHiPMemRPParams; struct SHiPPCRPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/tree_plru_rp.cc b/src/mem/cache/replacement_policies/tree_plru_rp.cc index 2ee987c959..5014785093 100644 --- a/src/mem/cache/replacement_policies/tree_plru_rp.cc +++ b/src/mem/cache/replacement_policies/tree_plru_rp.cc @@ -43,7 +43,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/tree_plru_rp.hh b/src/mem/cache/replacement_policies/tree_plru_rp.hh index 335670457c..1f7e91c33e 100644 --- a/src/mem/cache/replacement_policies/tree_plru_rp.hh +++ b/src/mem/cache/replacement_policies/tree_plru_rp.hh @@ -80,7 +80,6 @@ namespace gem5 struct TreePLRURPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/weighted_lru_rp.cc b/src/mem/cache/replacement_policies/weighted_lru_rp.cc index ed6a7f6166..ac8fd1015b 100644 --- a/src/mem/cache/replacement_policies/weighted_lru_rp.cc +++ b/src/mem/cache/replacement_policies/weighted_lru_rp.cc @@ -39,7 +39,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/replacement_policies/weighted_lru_rp.hh b/src/mem/cache/replacement_policies/weighted_lru_rp.hh index bc0e5735af..117b73b10f 100644 --- a/src/mem/cache/replacement_policies/weighted_lru_rp.hh +++ b/src/mem/cache/replacement_policies/weighted_lru_rp.hh @@ -42,7 +42,6 @@ namespace gem5 struct WeightedLRURPParams; -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { diff --git a/src/mem/cache/tags/sector_tags.hh b/src/mem/cache/tags/sector_tags.hh index c64621213d..bad132158c 100644 --- a/src/mem/cache/tags/sector_tags.hh +++ b/src/mem/cache/tags/sector_tags.hh @@ -47,7 +47,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy); namespace replacement_policy { class Base; From 65c15ba18884492888daee4e33f93e017566da02 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:41:36 -0300 Subject: [PATCH 119/492] mem-cache: Remove the Prefetcher namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I31953be7ce8566576de94c9296eeeec601c9906a Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67353 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/mem/cache/base.hh | 1 - src/mem/cache/prefetch/access_map_pattern_matching.cc | 1 - src/mem/cache/prefetch/access_map_pattern_matching.hh | 1 - src/mem/cache/prefetch/base.cc | 1 - src/mem/cache/prefetch/base.hh | 1 - src/mem/cache/prefetch/bop.cc | 1 - src/mem/cache/prefetch/bop.hh | 1 - src/mem/cache/prefetch/delta_correlating_prediction_tables.cc | 1 - src/mem/cache/prefetch/delta_correlating_prediction_tables.hh | 1 - src/mem/cache/prefetch/indirect_memory.cc | 1 - src/mem/cache/prefetch/indirect_memory.hh | 1 - src/mem/cache/prefetch/irregular_stream_buffer.cc | 1 - src/mem/cache/prefetch/irregular_stream_buffer.hh | 1 - src/mem/cache/prefetch/multi.cc | 1 - src/mem/cache/prefetch/multi.hh | 1 - src/mem/cache/prefetch/pif.cc | 1 - src/mem/cache/prefetch/pif.hh | 1 - src/mem/cache/prefetch/queued.cc | 1 - src/mem/cache/prefetch/queued.hh | 1 - src/mem/cache/prefetch/sbooe.cc | 1 - src/mem/cache/prefetch/sbooe.hh | 1 - src/mem/cache/prefetch/signature_path.cc | 1 - src/mem/cache/prefetch/signature_path.hh | 1 - src/mem/cache/prefetch/signature_path_v2.cc | 1 - src/mem/cache/prefetch/signature_path_v2.hh | 1 - src/mem/cache/prefetch/slim_ampm.cc | 1 - src/mem/cache/prefetch/slim_ampm.hh | 1 - src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc | 1 - src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh | 1 - src/mem/cache/prefetch/stride.cc | 1 - src/mem/cache/prefetch/stride.hh | 1 - src/mem/cache/prefetch/tagged.cc | 1 - src/mem/cache/prefetch/tagged.hh | 1 - 33 files changed, 33 deletions(-) diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 6fc76282e9..78571ceb3c 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -79,7 +79,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { class Base; diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.cc b/src/mem/cache/prefetch/access_map_pattern_matching.cc index 6bf5d9bca8..989f3c6be1 100644 --- a/src/mem/cache/prefetch/access_map_pattern_matching.cc +++ b/src/mem/cache/prefetch/access_map_pattern_matching.cc @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.hh b/src/mem/cache/prefetch/access_map_pattern_matching.hh index 3b0bc28f4d..893d30dec2 100644 --- a/src/mem/cache/prefetch/access_map_pattern_matching.hh +++ b/src/mem/cache/prefetch/access_map_pattern_matching.hh @@ -49,7 +49,6 @@ namespace gem5 struct AccessMapPatternMatchingParams; struct AMPMPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index 9ff81fba68..e3e4b24cf2 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -55,7 +55,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh index e5e43e534f..6bae73519c 100644 --- a/src/mem/cache/prefetch/base.hh +++ b/src/mem/cache/prefetch/base.hh @@ -65,7 +65,6 @@ namespace gem5 class BaseCache; struct BasePrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/bop.cc b/src/mem/cache/prefetch/bop.cc index a60c1fe95e..ce2502bee6 100644 --- a/src/mem/cache/prefetch/bop.cc +++ b/src/mem/cache/prefetch/bop.cc @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/bop.hh b/src/mem/cache/prefetch/bop.hh index 7fdba2bbf2..bb1b05dfa9 100644 --- a/src/mem/cache/prefetch/bop.hh +++ b/src/mem/cache/prefetch/bop.hh @@ -46,7 +46,6 @@ namespace gem5 struct BOPPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc b/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc index c5e126c4c0..b59394ce25 100644 --- a/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc +++ b/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh b/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh index 8ad21a6691..0218e9138a 100644 --- a/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh +++ b/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh @@ -39,7 +39,6 @@ namespace gem5 struct DeltaCorrelatingPredictionTablesParams; struct DCPTPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/indirect_memory.cc b/src/mem/cache/prefetch/indirect_memory.cc index 7bb1545f7f..ab84ce25a2 100644 --- a/src/mem/cache/prefetch/indirect_memory.cc +++ b/src/mem/cache/prefetch/indirect_memory.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/indirect_memory.hh b/src/mem/cache/prefetch/indirect_memory.hh index 85fb50e5a7..da3e894cfa 100644 --- a/src/mem/cache/prefetch/indirect_memory.hh +++ b/src/mem/cache/prefetch/indirect_memory.hh @@ -50,7 +50,6 @@ namespace gem5 struct IndirectMemoryPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/irregular_stream_buffer.cc b/src/mem/cache/prefetch/irregular_stream_buffer.cc index fc0d71faa3..ce30b41aa6 100644 --- a/src/mem/cache/prefetch/irregular_stream_buffer.cc +++ b/src/mem/cache/prefetch/irregular_stream_buffer.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/irregular_stream_buffer.hh b/src/mem/cache/prefetch/irregular_stream_buffer.hh index 20dadd60a2..39373010bb 100644 --- a/src/mem/cache/prefetch/irregular_stream_buffer.hh +++ b/src/mem/cache/prefetch/irregular_stream_buffer.hh @@ -48,7 +48,6 @@ namespace gem5 struct IrregularStreamBufferPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/multi.cc b/src/mem/cache/prefetch/multi.cc index ddf0e30d59..1f7298f354 100644 --- a/src/mem/cache/prefetch/multi.cc +++ b/src/mem/cache/prefetch/multi.cc @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/multi.hh b/src/mem/cache/prefetch/multi.hh index ff17918346..7890f090b5 100644 --- a/src/mem/cache/prefetch/multi.hh +++ b/src/mem/cache/prefetch/multi.hh @@ -47,7 +47,6 @@ namespace gem5 struct MultiPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/pif.cc b/src/mem/cache/prefetch/pif.cc index 95b9f4f60f..79e8e6d747 100644 --- a/src/mem/cache/prefetch/pif.cc +++ b/src/mem/cache/prefetch/pif.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/pif.hh b/src/mem/cache/prefetch/pif.hh index e48d8fbc52..296087e8e0 100644 --- a/src/mem/cache/prefetch/pif.hh +++ b/src/mem/cache/prefetch/pif.hh @@ -49,7 +49,6 @@ namespace gem5 struct PIFPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc index b85a227f00..1ab34d2e9b 100644 --- a/src/mem/cache/prefetch/queued.cc +++ b/src/mem/cache/prefetch/queued.cc @@ -51,7 +51,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/queued.hh b/src/mem/cache/prefetch/queued.hh index 87d3456def..1d1a3faef4 100644 --- a/src/mem/cache/prefetch/queued.hh +++ b/src/mem/cache/prefetch/queued.hh @@ -53,7 +53,6 @@ namespace gem5 struct QueuedPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/sbooe.cc b/src/mem/cache/prefetch/sbooe.cc index a3f023126d..44a10c232d 100644 --- a/src/mem/cache/prefetch/sbooe.cc +++ b/src/mem/cache/prefetch/sbooe.cc @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/sbooe.hh b/src/mem/cache/prefetch/sbooe.hh index 9b25816b23..7914b88f45 100644 --- a/src/mem/cache/prefetch/sbooe.hh +++ b/src/mem/cache/prefetch/sbooe.hh @@ -47,7 +47,6 @@ namespace gem5 struct SBOOEPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/signature_path.cc b/src/mem/cache/prefetch/signature_path.cc index 2f9477b703..a36ef809ce 100644 --- a/src/mem/cache/prefetch/signature_path.cc +++ b/src/mem/cache/prefetch/signature_path.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/signature_path.hh b/src/mem/cache/prefetch/signature_path.hh index 9cffa33d9a..9613fe0886 100644 --- a/src/mem/cache/prefetch/signature_path.hh +++ b/src/mem/cache/prefetch/signature_path.hh @@ -50,7 +50,6 @@ namespace gem5 struct SignaturePathPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/signature_path_v2.cc b/src/mem/cache/prefetch/signature_path_v2.cc index 230bc76256..b50721ca69 100644 --- a/src/mem/cache/prefetch/signature_path_v2.cc +++ b/src/mem/cache/prefetch/signature_path_v2.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/signature_path_v2.hh b/src/mem/cache/prefetch/signature_path_v2.hh index b7f745cd94..417b7ec540 100644 --- a/src/mem/cache/prefetch/signature_path_v2.hh +++ b/src/mem/cache/prefetch/signature_path_v2.hh @@ -50,7 +50,6 @@ namespace gem5 struct SignaturePathPrefetcherV2Params; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/slim_ampm.cc b/src/mem/cache/prefetch/slim_ampm.cc index 85f89663ca..950994a4bd 100644 --- a/src/mem/cache/prefetch/slim_ampm.cc +++ b/src/mem/cache/prefetch/slim_ampm.cc @@ -33,7 +33,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/slim_ampm.hh b/src/mem/cache/prefetch/slim_ampm.hh index 4a07b9bf45..54f38d4885 100644 --- a/src/mem/cache/prefetch/slim_ampm.hh +++ b/src/mem/cache/prefetch/slim_ampm.hh @@ -48,7 +48,6 @@ namespace gem5 struct SlimAMPMPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc index 3c9b9eb64c..0e3211579c 100644 --- a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc +++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh index bee746c5c7..cdd2788104 100644 --- a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh +++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh @@ -53,7 +53,6 @@ namespace gem5 struct STeMSPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index 1d375a6228..0a77b28a1c 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -60,7 +60,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh index 27fa917436..7e55abea21 100644 --- a/src/mem/cache/prefetch/stride.hh +++ b/src/mem/cache/prefetch/stride.hh @@ -70,7 +70,6 @@ namespace replacement_policy } struct StridePrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc index d385ac0611..0d4d79b006 100644 --- a/src/mem/cache/prefetch/tagged.cc +++ b/src/mem/cache/prefetch/tagged.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { diff --git a/src/mem/cache/prefetch/tagged.hh b/src/mem/cache/prefetch/tagged.hh index d7f77a8841..5c91f654b1 100644 --- a/src/mem/cache/prefetch/tagged.hh +++ b/src/mem/cache/prefetch/tagged.hh @@ -42,7 +42,6 @@ namespace gem5 struct TaggedPrefetcherParams; -GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch); namespace prefetch { From de408fbd4e4cb0b957f45554daba94273218cb80 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:48:54 -0300 Subject: [PATCH 120/492] mem-cache: Remove the Compressor namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Ibbcc8221ed6042d55f56a94bf499a4c1c564ea82 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67354 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Richard Cooper --- src/mem/cache/compressors/base.cc | 1 - src/mem/cache/compressors/base.hh | 1 - src/mem/cache/compressors/base_delta.cc | 1 - src/mem/cache/compressors/base_delta.hh | 1 - src/mem/cache/compressors/base_delta_impl.hh | 1 - src/mem/cache/compressors/base_dictionary_compressor.cc | 1 - src/mem/cache/compressors/cpack.cc | 1 - src/mem/cache/compressors/cpack.hh | 1 - src/mem/cache/compressors/dictionary_compressor.hh | 1 - src/mem/cache/compressors/dictionary_compressor_impl.hh | 1 - src/mem/cache/compressors/encoders/base.hh | 1 - src/mem/cache/compressors/encoders/huffman.cc | 1 - src/mem/cache/compressors/encoders/huffman.hh | 1 - src/mem/cache/compressors/fpc.cc | 1 - src/mem/cache/compressors/fpc.hh | 1 - src/mem/cache/compressors/fpcd.cc | 1 - src/mem/cache/compressors/fpcd.hh | 1 - src/mem/cache/compressors/frequent_values.cc | 1 - src/mem/cache/compressors/frequent_values.hh | 1 - src/mem/cache/compressors/multi.cc | 1 - src/mem/cache/compressors/multi.hh | 1 - src/mem/cache/compressors/perfect.cc | 1 - src/mem/cache/compressors/perfect.hh | 1 - src/mem/cache/compressors/repeated_qwords.cc | 1 - src/mem/cache/compressors/repeated_qwords.hh | 1 - src/mem/cache/compressors/zero.cc | 1 - src/mem/cache/compressors/zero.hh | 1 - 27 files changed, 27 deletions(-) diff --git a/src/mem/cache/compressors/base.cc b/src/mem/cache/compressors/base.cc index cafd691bbc..df3020dbf8 100644 --- a/src/mem/cache/compressors/base.cc +++ b/src/mem/cache/compressors/base.cc @@ -48,7 +48,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/base.hh b/src/mem/cache/compressors/base.hh index 4945176cd7..110c6a44f8 100644 --- a/src/mem/cache/compressors/base.hh +++ b/src/mem/cache/compressors/base.hh @@ -50,7 +50,6 @@ class BaseCache; class CacheBlk; struct BaseCacheCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/base_delta.cc b/src/mem/cache/compressors/base_delta.cc index 9b2e67c023..308dabf2b2 100644 --- a/src/mem/cache/compressors/base_delta.cc +++ b/src/mem/cache/compressors/base_delta.cc @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/base_delta.hh b/src/mem/cache/compressors/base_delta.hh index 81f2c4b546..a0e666886c 100644 --- a/src/mem/cache/compressors/base_delta.hh +++ b/src/mem/cache/compressors/base_delta.hh @@ -52,7 +52,6 @@ struct Base32Delta8Params; struct Base32Delta16Params; struct Base16Delta8Params; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/base_delta_impl.hh b/src/mem/cache/compressors/base_delta_impl.hh index c4a841de36..c43283c814 100644 --- a/src/mem/cache/compressors/base_delta_impl.hh +++ b/src/mem/cache/compressors/base_delta_impl.hh @@ -40,7 +40,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/base_dictionary_compressor.cc b/src/mem/cache/compressors/base_dictionary_compressor.cc index 6a1ed925f4..d289db1872 100644 --- a/src/mem/cache/compressors/base_dictionary_compressor.cc +++ b/src/mem/cache/compressors/base_dictionary_compressor.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/cpack.cc b/src/mem/cache/compressors/cpack.cc index 64376b9237..44f47bbf4c 100644 --- a/src/mem/cache/compressors/cpack.cc +++ b/src/mem/cache/compressors/cpack.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/cpack.hh b/src/mem/cache/compressors/cpack.hh index 51f5ce17bc..d1005d1dc8 100644 --- a/src/mem/cache/compressors/cpack.hh +++ b/src/mem/cache/compressors/cpack.hh @@ -46,7 +46,6 @@ namespace gem5 struct CPackParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/dictionary_compressor.hh b/src/mem/cache/compressors/dictionary_compressor.hh index c283280980..6efdb73e20 100644 --- a/src/mem/cache/compressors/dictionary_compressor.hh +++ b/src/mem/cache/compressors/dictionary_compressor.hh @@ -61,7 +61,6 @@ namespace gem5 struct BaseDictionaryCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/dictionary_compressor_impl.hh b/src/mem/cache/compressors/dictionary_compressor_impl.hh index 9eb265b1c6..6fef9482d3 100644 --- a/src/mem/cache/compressors/dictionary_compressor_impl.hh +++ b/src/mem/cache/compressors/dictionary_compressor_impl.hh @@ -43,7 +43,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/encoders/base.hh b/src/mem/cache/compressors/encoders/base.hh index 92971afe74..c5f22977e6 100644 --- a/src/mem/cache/compressors/encoders/base.hh +++ b/src/mem/cache/compressors/encoders/base.hh @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { GEM5_DEPRECATED_NAMESPACE(Encoder, encoder); diff --git a/src/mem/cache/compressors/encoders/huffman.cc b/src/mem/cache/compressors/encoders/huffman.cc index 7a47aa93e8..a7f24cff94 100644 --- a/src/mem/cache/compressors/encoders/huffman.cc +++ b/src/mem/cache/compressors/encoders/huffman.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { GEM5_DEPRECATED_NAMESPACE(Encoder, encoder); diff --git a/src/mem/cache/compressors/encoders/huffman.hh b/src/mem/cache/compressors/encoders/huffman.hh index 3f29f2c264..2ea53641da 100644 --- a/src/mem/cache/compressors/encoders/huffman.hh +++ b/src/mem/cache/compressors/encoders/huffman.hh @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { GEM5_DEPRECATED_NAMESPACE(Encoder, encoder); diff --git a/src/mem/cache/compressors/fpc.cc b/src/mem/cache/compressors/fpc.cc index 80713552e2..f910eb1494 100644 --- a/src/mem/cache/compressors/fpc.cc +++ b/src/mem/cache/compressors/fpc.cc @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/fpc.hh b/src/mem/cache/compressors/fpc.hh index 9dd40fbd2a..629b3f1711 100644 --- a/src/mem/cache/compressors/fpc.hh +++ b/src/mem/cache/compressors/fpc.hh @@ -51,7 +51,6 @@ namespace gem5 struct FPCParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/fpcd.cc b/src/mem/cache/compressors/fpcd.cc index 480d34f445..b0ea55c892 100644 --- a/src/mem/cache/compressors/fpcd.cc +++ b/src/mem/cache/compressors/fpcd.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/fpcd.hh b/src/mem/cache/compressors/fpcd.hh index d1ee015957..4df5036037 100644 --- a/src/mem/cache/compressors/fpcd.hh +++ b/src/mem/cache/compressors/fpcd.hh @@ -52,7 +52,6 @@ namespace gem5 struct FPCDParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/frequent_values.cc b/src/mem/cache/compressors/frequent_values.cc index f9f73a9003..b5eca3b096 100644 --- a/src/mem/cache/compressors/frequent_values.cc +++ b/src/mem/cache/compressors/frequent_values.cc @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/frequent_values.hh b/src/mem/cache/compressors/frequent_values.hh index c2874e9f43..e7eac2644f 100644 --- a/src/mem/cache/compressors/frequent_values.hh +++ b/src/mem/cache/compressors/frequent_values.hh @@ -48,7 +48,6 @@ namespace gem5 struct FrequentValuesCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/multi.cc b/src/mem/cache/compressors/multi.cc index cbc307accb..d86ea2c168 100644 --- a/src/mem/cache/compressors/multi.cc +++ b/src/mem/cache/compressors/multi.cc @@ -45,7 +45,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/multi.hh b/src/mem/cache/compressors/multi.hh index 2cdf78fc97..bb9bd57a0d 100644 --- a/src/mem/cache/compressors/multi.hh +++ b/src/mem/cache/compressors/multi.hh @@ -46,7 +46,6 @@ namespace gem5 struct MultiCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/perfect.cc b/src/mem/cache/compressors/perfect.cc index e271fa0556..76c37f8a47 100644 --- a/src/mem/cache/compressors/perfect.cc +++ b/src/mem/cache/compressors/perfect.cc @@ -41,7 +41,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/perfect.hh b/src/mem/cache/compressors/perfect.hh index 0d91c50ed2..eaa43ca86a 100644 --- a/src/mem/cache/compressors/perfect.hh +++ b/src/mem/cache/compressors/perfect.hh @@ -46,7 +46,6 @@ namespace gem5 struct PerfectCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/repeated_qwords.cc b/src/mem/cache/compressors/repeated_qwords.cc index 8d5c32da86..01e83961e3 100644 --- a/src/mem/cache/compressors/repeated_qwords.cc +++ b/src/mem/cache/compressors/repeated_qwords.cc @@ -41,7 +41,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/repeated_qwords.hh b/src/mem/cache/compressors/repeated_qwords.hh index 3e900a1655..25deb1ff58 100644 --- a/src/mem/cache/compressors/repeated_qwords.hh +++ b/src/mem/cache/compressors/repeated_qwords.hh @@ -46,7 +46,6 @@ namespace gem5 struct RepeatedQwordsCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/zero.cc b/src/mem/cache/compressors/zero.cc index 42a3c7c613..3dca1ecfa0 100644 --- a/src/mem/cache/compressors/zero.cc +++ b/src/mem/cache/compressors/zero.cc @@ -41,7 +41,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { diff --git a/src/mem/cache/compressors/zero.hh b/src/mem/cache/compressors/zero.hh index 6e8ce34b2a..5ab994f6ee 100644 --- a/src/mem/cache/compressors/zero.hh +++ b/src/mem/cache/compressors/zero.hh @@ -46,7 +46,6 @@ namespace gem5 struct ZeroCompressorParams; -GEM5_DEPRECATED_NAMESPACE(Compressor, compression); namespace compression { From 82aa4c835846d9c95c75cc8d06ab82a4b6cc7caa Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:01:34 -0300 Subject: [PATCH 121/492] mem-cache: Remove the Encoder namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Iabe3b61eb2409a10c582ab1f1c26abc649c1646a Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67355 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/mem/cache/compressors/encoders/base.hh | 1 - src/mem/cache/compressors/encoders/huffman.cc | 1 - src/mem/cache/compressors/encoders/huffman.hh | 1 - 3 files changed, 3 deletions(-) diff --git a/src/mem/cache/compressors/encoders/base.hh b/src/mem/cache/compressors/encoders/base.hh index c5f22977e6..ddc8c67567 100644 --- a/src/mem/cache/compressors/encoders/base.hh +++ b/src/mem/cache/compressors/encoders/base.hh @@ -38,7 +38,6 @@ namespace gem5 namespace compression { -GEM5_DEPRECATED_NAMESPACE(Encoder, encoder); namespace encoder { diff --git a/src/mem/cache/compressors/encoders/huffman.cc b/src/mem/cache/compressors/encoders/huffman.cc index a7f24cff94..5be3bceaef 100644 --- a/src/mem/cache/compressors/encoders/huffman.cc +++ b/src/mem/cache/compressors/encoders/huffman.cc @@ -37,7 +37,6 @@ namespace gem5 namespace compression { -GEM5_DEPRECATED_NAMESPACE(Encoder, encoder); namespace encoder { diff --git a/src/mem/cache/compressors/encoders/huffman.hh b/src/mem/cache/compressors/encoders/huffman.hh index 2ea53641da..761485486e 100644 --- a/src/mem/cache/compressors/encoders/huffman.hh +++ b/src/mem/cache/compressors/encoders/huffman.hh @@ -44,7 +44,6 @@ namespace gem5 namespace compression { -GEM5_DEPRECATED_NAMESPACE(Encoder, encoder); namespace encoder { From 813c27c97a6ef0a283da32fa5b3322b4a6e9f57a Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:59:35 -0300 Subject: [PATCH 122/492] mem: Remove the QoS namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I2fa66e5fc77f19beaac3251602617704dadaec99 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67356 Tested-by: kokoro Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power --- src/mem/qos/mem_ctrl.cc | 1 - src/mem/qos/mem_ctrl.hh | 1 - src/mem/qos/mem_sink.cc | 1 - src/mem/qos/mem_sink.hh | 1 - src/mem/qos/policy.cc | 1 - src/mem/qos/policy.hh | 1 - src/mem/qos/policy_fixed_prio.cc | 1 - src/mem/qos/policy_fixed_prio.hh | 1 - src/mem/qos/policy_pf.cc | 1 - src/mem/qos/policy_pf.hh | 1 - src/mem/qos/q_policy.cc | 1 - src/mem/qos/q_policy.hh | 1 - src/mem/qos/turnaround_policy.hh | 1 - src/mem/qos/turnaround_policy_ideal.cc | 1 - src/mem/qos/turnaround_policy_ideal.hh | 1 - 15 files changed, 15 deletions(-) diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc index 5bb031c9ed..9bf13280da 100644 --- a/src/mem/qos/mem_ctrl.cc +++ b/src/mem/qos/mem_ctrl.cc @@ -48,7 +48,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh index 11e787d484..359e2858be 100644 --- a/src/mem/qos/mem_ctrl.hh +++ b/src/mem/qos/mem_ctrl.hh @@ -64,7 +64,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc index 2dec5d574a..3ffe7f4d61 100644 --- a/src/mem/qos/mem_sink.cc +++ b/src/mem/qos/mem_sink.cc @@ -50,7 +50,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh index a2e975a483..d2310c65fe 100644 --- a/src/mem/qos/mem_sink.hh +++ b/src/mem/qos/mem_sink.hh @@ -59,7 +59,6 @@ struct QoSMemSinkInterfaceParams; namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/policy.cc b/src/mem/qos/policy.cc index 6d41e7d452..5ca7eae6b6 100644 --- a/src/mem/qos/policy.cc +++ b/src/mem/qos/policy.cc @@ -45,7 +45,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/policy.hh b/src/mem/qos/policy.hh index a7e7666a86..c5bd2be91c 100644 --- a/src/mem/qos/policy.hh +++ b/src/mem/qos/policy.hh @@ -57,7 +57,6 @@ struct QoSPolicyParams; namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/policy_fixed_prio.cc b/src/mem/qos/policy_fixed_prio.cc index 140817e55f..f64aae9aaf 100644 --- a/src/mem/qos/policy_fixed_prio.cc +++ b/src/mem/qos/policy_fixed_prio.cc @@ -51,7 +51,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/policy_fixed_prio.hh b/src/mem/qos/policy_fixed_prio.hh index 77e7a2515e..18ff6ac8d9 100644 --- a/src/mem/qos/policy_fixed_prio.hh +++ b/src/mem/qos/policy_fixed_prio.hh @@ -52,7 +52,6 @@ struct QoSFixedPriorityPolicyParams; namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/policy_pf.cc b/src/mem/qos/policy_pf.cc index ae15045857..adbcdb448c 100644 --- a/src/mem/qos/policy_pf.cc +++ b/src/mem/qos/policy_pf.cc @@ -48,7 +48,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/policy_pf.hh b/src/mem/qos/policy_pf.hh index acc2a4a6a6..4c215e54c6 100644 --- a/src/mem/qos/policy_pf.hh +++ b/src/mem/qos/policy_pf.hh @@ -52,7 +52,6 @@ struct QoSPropFairPolicyParams; namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/q_policy.cc b/src/mem/qos/q_policy.cc index de2e31660e..a6d13feb7e 100644 --- a/src/mem/qos/q_policy.cc +++ b/src/mem/qos/q_policy.cc @@ -52,7 +52,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/q_policy.hh b/src/mem/qos/q_policy.hh index 7af52b6d34..fc9200d0af 100644 --- a/src/mem/qos/q_policy.hh +++ b/src/mem/qos/q_policy.hh @@ -53,7 +53,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/turnaround_policy.hh b/src/mem/qos/turnaround_policy.hh index 2d5696f60d..9bbb446a12 100644 --- a/src/mem/qos/turnaround_policy.hh +++ b/src/mem/qos/turnaround_policy.hh @@ -49,7 +49,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/turnaround_policy_ideal.cc b/src/mem/qos/turnaround_policy_ideal.cc index c67e40b2c6..8d3d7d0b11 100644 --- a/src/mem/qos/turnaround_policy_ideal.cc +++ b/src/mem/qos/turnaround_policy_ideal.cc @@ -48,7 +48,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { diff --git a/src/mem/qos/turnaround_policy_ideal.hh b/src/mem/qos/turnaround_policy_ideal.hh index 0a75f79bf8..de416c475c 100644 --- a/src/mem/qos/turnaround_policy_ideal.hh +++ b/src/mem/qos/turnaround_policy_ideal.hh @@ -47,7 +47,6 @@ namespace gem5 namespace memory { -GEM5_DEPRECATED_NAMESPACE(QoS, qos); namespace qos { From e881f2603cc31a3a7d628ccb7890d0b5e3d5a3a5 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:14:33 -0300 Subject: [PATCH 123/492] mem: Remove the ContextSwitchTaskId namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Iab4bb6ac6e8d603fb508330691796ccdac4b9cb6 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67357 Tested-by: kokoro Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power --- src/mem/request.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mem/request.hh b/src/mem/request.hh index 6a0cbc21d4..be91c71cc0 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -74,7 +74,6 @@ namespace gem5 * doesn't cause a problem with stats and is large enough to realistic * benchmarks (Linux/Android boot, BBench, etc.) */ -GEM5_DEPRECATED_NAMESPACE(ContextSwitchTaskId, context_switch_task_id); namespace context_switch_task_id { enum TaskId From 65317b6fc93f480f588b55d3d1979be572125eb3 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:50:13 -0300 Subject: [PATCH 124/492] base: Remove the BloomFilter namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Ib919285c6270eb53bd29ab534f3f9b5612417bb2 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67358 Tested-by: kokoro Maintainer: Jason Lowe-Power Reviewed-by: Richard Cooper --- src/base/filters/base.hh | 1 - src/base/filters/block_bloom_filter.cc | 1 - src/base/filters/block_bloom_filter.hh | 1 - src/base/filters/bulk_bloom_filter.cc | 1 - src/base/filters/bulk_bloom_filter.hh | 1 - src/base/filters/h3_bloom_filter.cc | 1 - src/base/filters/h3_bloom_filter.hh | 1 - src/base/filters/multi_bit_sel_bloom_filter.cc | 1 - src/base/filters/multi_bit_sel_bloom_filter.hh | 1 - src/base/filters/multi_bloom_filter.cc | 1 - src/base/filters/multi_bloom_filter.hh | 1 - src/base/filters/perfect_bloom_filter.cc | 1 - src/base/filters/perfect_bloom_filter.hh | 1 - 13 files changed, 13 deletions(-) diff --git a/src/base/filters/base.hh b/src/base/filters/base.hh index f2b9fce7c9..858e265dc0 100644 --- a/src/base/filters/base.hh +++ b/src/base/filters/base.hh @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/block_bloom_filter.cc b/src/base/filters/block_bloom_filter.cc index e1ae116783..7a3c170057 100644 --- a/src/base/filters/block_bloom_filter.cc +++ b/src/base/filters/block_bloom_filter.cc @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/block_bloom_filter.hh b/src/base/filters/block_bloom_filter.hh index 0375d30a1f..f7040064a1 100644 --- a/src/base/filters/block_bloom_filter.hh +++ b/src/base/filters/block_bloom_filter.hh @@ -39,7 +39,6 @@ namespace gem5 struct BloomFilterBlockParams; -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/bulk_bloom_filter.cc b/src/base/filters/bulk_bloom_filter.cc index 3a2ac58cc7..cf28bf90f3 100644 --- a/src/base/filters/bulk_bloom_filter.cc +++ b/src/base/filters/bulk_bloom_filter.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/bulk_bloom_filter.hh b/src/base/filters/bulk_bloom_filter.hh index 985fcb3f7a..6c474760ae 100644 --- a/src/base/filters/bulk_bloom_filter.hh +++ b/src/base/filters/bulk_bloom_filter.hh @@ -37,7 +37,6 @@ namespace gem5 struct BloomFilterBulkParams; -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/h3_bloom_filter.cc b/src/base/filters/h3_bloom_filter.cc index e1aeba7e73..9e973d88fa 100644 --- a/src/base/filters/h3_bloom_filter.cc +++ b/src/base/filters/h3_bloom_filter.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/h3_bloom_filter.hh b/src/base/filters/h3_bloom_filter.hh index a60c21217a..fc6ba657e2 100644 --- a/src/base/filters/h3_bloom_filter.hh +++ b/src/base/filters/h3_bloom_filter.hh @@ -37,7 +37,6 @@ namespace gem5 struct BloomFilterH3Params; -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/multi_bit_sel_bloom_filter.cc b/src/base/filters/multi_bit_sel_bloom_filter.cc index f12d1f766d..e6f6c14576 100644 --- a/src/base/filters/multi_bit_sel_bloom_filter.cc +++ b/src/base/filters/multi_bit_sel_bloom_filter.cc @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/multi_bit_sel_bloom_filter.hh b/src/base/filters/multi_bit_sel_bloom_filter.hh index 8c5b34cdd5..a746b1d0ad 100644 --- a/src/base/filters/multi_bit_sel_bloom_filter.hh +++ b/src/base/filters/multi_bit_sel_bloom_filter.hh @@ -37,7 +37,6 @@ namespace gem5 struct BloomFilterMultiBitSelParams; -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/multi_bloom_filter.cc b/src/base/filters/multi_bloom_filter.cc index 401d84401d..f6b4892800 100644 --- a/src/base/filters/multi_bloom_filter.cc +++ b/src/base/filters/multi_bloom_filter.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/multi_bloom_filter.hh b/src/base/filters/multi_bloom_filter.hh index ec9838a7b3..9445b81d5c 100644 --- a/src/base/filters/multi_bloom_filter.hh +++ b/src/base/filters/multi_bloom_filter.hh @@ -39,7 +39,6 @@ namespace gem5 struct BloomFilterMultiParams; -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/perfect_bloom_filter.cc b/src/base/filters/perfect_bloom_filter.cc index 7583a1a196..f6f9d8b106 100644 --- a/src/base/filters/perfect_bloom_filter.cc +++ b/src/base/filters/perfect_bloom_filter.cc @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { diff --git a/src/base/filters/perfect_bloom_filter.hh b/src/base/filters/perfect_bloom_filter.hh index 65ef01544c..2bcecb8987 100644 --- a/src/base/filters/perfect_bloom_filter.hh +++ b/src/base/filters/perfect_bloom_filter.hh @@ -38,7 +38,6 @@ namespace gem5 struct BloomFilterPerfectParams; -GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter); namespace bloom_filter { From 4f480fc6fc5d639ca16cc7f4a9bdacc597251b02 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:51:32 -0300 Subject: [PATCH 125/492] base: Remove the Stats namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I80f25af68e03fff3df8316cb4d1d2669687d0fe4 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67359 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/base/statistics.cc | 1 - src/base/statistics.hh | 1 - src/base/stats/group.cc | 1 - src/base/stats/group.hh | 1 - src/base/stats/group.test.cc | 6 +++--- src/base/stats/hdf5.cc | 1 - src/base/stats/hdf5.hh | 1 - src/base/stats/info.cc | 1 - src/base/stats/info.hh | 1 - src/base/stats/output.hh | 1 - src/base/stats/storage.cc | 1 - src/base/stats/storage.hh | 1 - src/base/stats/text.cc | 1 - src/base/stats/text.hh | 1 - src/base/stats/types.hh | 1 - src/base/stats/units.hh | 1 - src/python/pybind11/stats.cc | 1 - src/sim/power/mathexpr_powermodel.hh | 1 - src/sim/stat_control.cc | 1 - src/sim/stat_control.hh | 1 - src/sim/stat_register.cc | 1 - src/sim/stat_register.hh | 1 - 22 files changed, 3 insertions(+), 24 deletions(-) diff --git a/src/base/statistics.cc b/src/base/statistics.cc index c3801436e7..2fddf1bab6 100644 --- a/src/base/statistics.cc +++ b/src/base/statistics.cc @@ -53,7 +53,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/statistics.hh b/src/base/statistics.hh index 24cbf714f5..8156be5a79 100644 --- a/src/base/statistics.hh +++ b/src/base/statistics.hh @@ -91,7 +91,6 @@ namespace gem5 { /* A namespace for all of the Statistics */ -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/group.cc b/src/base/stats/group.cc index d5626e6bb1..93e7183f0e 100644 --- a/src/base/stats/group.cc +++ b/src/base/stats/group.cc @@ -47,7 +47,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/group.hh b/src/base/stats/group.hh index bd1183e4a9..3c11e61138 100644 --- a/src/base/stats/group.hh +++ b/src/base/stats/group.hh @@ -74,7 +74,6 @@ namespace gem5 #define ADD_STAT(n, ...) n(this, #n, __VA_ARGS__) -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/group.test.cc b/src/base/stats/group.test.cc index e2e059830c..5a7cde4928 100644 --- a/src/base/stats/group.test.cc +++ b/src/base/stats/group.test.cc @@ -255,10 +255,10 @@ TEST(StatsGroupTest, ConstructTwoLevelsUnbalancedTree) ASSERT_EQ(node2_2.getStatGroups().size(), 0); } -class DummyInfo : public Stats::Info +class DummyInfo : public statistics::Info { public: - using Stats::Info::Info; + using statistics::Info::Info; int value = 0; @@ -266,7 +266,7 @@ class DummyInfo : public Stats::Info void prepare() override {} void reset() override { value = 0; } bool zero() const override { return false; } - void visit(Stats::Output &visitor) override {} + void visit(statistics::Output &visitor) override {} }; /** Test adding stats to a group. */ diff --git a/src/base/stats/hdf5.cc b/src/base/stats/hdf5.cc index 03574b2e4d..be548bf806 100644 --- a/src/base/stats/hdf5.cc +++ b/src/base/stats/hdf5.cc @@ -59,7 +59,6 @@ bool emptyStrings(const T &labels) } -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/hdf5.hh b/src/base/stats/hdf5.hh index 7fa99991a6..ac21ee8af1 100644 --- a/src/base/stats/hdf5.hh +++ b/src/base/stats/hdf5.hh @@ -53,7 +53,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/info.cc b/src/base/stats/info.cc index c40b55918e..06e7ec977d 100644 --- a/src/base/stats/info.cc +++ b/src/base/stats/info.cc @@ -52,7 +52,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/info.hh b/src/base/stats/info.hh index 9a5e2e77ab..98859cb0d9 100644 --- a/src/base/stats/info.hh +++ b/src/base/stats/info.hh @@ -43,7 +43,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/output.hh b/src/base/stats/output.hh index 39b0804a40..23531e8493 100644 --- a/src/base/stats/output.hh +++ b/src/base/stats/output.hh @@ -49,7 +49,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/storage.cc b/src/base/stats/storage.cc index 6b32dc501a..3b2c091815 100644 --- a/src/base/stats/storage.cc +++ b/src/base/stats/storage.cc @@ -46,7 +46,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/storage.hh b/src/base/stats/storage.hh index cf22e10080..eb1873b934 100644 --- a/src/base/stats/storage.hh +++ b/src/base/stats/storage.hh @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/text.cc b/src/base/stats/text.cc index db5743ac57..36282a35d7 100644 --- a/src/base/stats/text.cc +++ b/src/base/stats/text.cc @@ -67,7 +67,6 @@ constexpr auto Nan = std::numeric_limits::quiet_NaN(); } // anonymous namespace -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/text.hh b/src/base/stats/text.hh index 4bbe3eadfe..7be498d8da 100644 --- a/src/base/stats/text.hh +++ b/src/base/stats/text.hh @@ -53,7 +53,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/types.hh b/src/base/stats/types.hh index 92d594ab77..14f89caff3 100644 --- a/src/base/stats/types.hh +++ b/src/base/stats/types.hh @@ -39,7 +39,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/base/stats/units.hh b/src/base/stats/units.hh index 52e2e57ce8..fe5b23d878 100644 --- a/src/base/stats/units.hh +++ b/src/base/stats/units.hh @@ -75,7 +75,6 @@ namespace gem5 UNIT_UNSPECIFIED, statistics::units::Unspecified::get(), \ "Use statistics::units::Unspecified::get()") -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/python/pybind11/stats.cc b/src/python/pybind11/stats.cc index 2c60b47ee4..266f47e52a 100644 --- a/src/python/pybind11/stats.cc +++ b/src/python/pybind11/stats.cc @@ -83,7 +83,6 @@ cast_stat_info(const statistics::Info *info) #undef TRY_CAST } -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/sim/power/mathexpr_powermodel.hh b/src/sim/power/mathexpr_powermodel.hh index 25338eea43..f05214a16f 100644 --- a/src/sim/power/mathexpr_powermodel.hh +++ b/src/sim/power/mathexpr_powermodel.hh @@ -47,7 +47,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { class Info; diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc index c388539551..99c694a384 100644 --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -57,7 +57,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/sim/stat_control.hh b/src/sim/stat_control.hh index 22d3134370..35d3ea8fcb 100644 --- a/src/sim/stat_control.hh +++ b/src/sim/stat_control.hh @@ -48,7 +48,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/sim/stat_register.cc b/src/sim/stat_register.cc index fb3db1e4b1..5e4bf3908e 100644 --- a/src/sim/stat_register.cc +++ b/src/sim/stat_register.cc @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { diff --git a/src/sim/stat_register.hh b/src/sim/stat_register.hh index d2504f3f02..e84e8ebdb6 100644 --- a/src/sim/stat_register.hh +++ b/src/sim/stat_register.hh @@ -47,7 +47,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Stats, statistics); namespace statistics { From d2bfb4aeef4dea175482093c42744fbdb8f55f33 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:58:30 -0300 Subject: [PATCH 126/492] base: Remove the Debug namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I4241501f3683c1daa8554693cba7aa2c022db130 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67360 Reviewed-by: Richard Cooper Tested-by: kokoro Maintainer: Jason Lowe-Power --- build_tools/debugflaghh.py | 1 - src/base/debug.cc | 1 - src/base/debug.hh | 1 - 3 files changed, 3 deletions(-) diff --git a/build_tools/debugflaghh.py b/build_tools/debugflaghh.py index 2e861e2790..1a4a379204 100644 --- a/build_tools/debugflaghh.py +++ b/build_tools/debugflaghh.py @@ -82,7 +82,6 @@ code( namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Debug, debug); namespace debug { diff --git a/src/base/debug.cc b/src/base/debug.cc index aa4092afc1..73b52f311f 100644 --- a/src/base/debug.cc +++ b/src/base/debug.cc @@ -52,7 +52,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Debug, debug); namespace debug { diff --git a/src/base/debug.hh b/src/base/debug.hh index f6b03ae2a7..3941e66022 100644 --- a/src/base/debug.hh +++ b/src/base/debug.hh @@ -53,7 +53,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Debug, debug); namespace debug { From 544d53798b9f931c68de98c5f9c7d741eb0a14b1 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:53:34 -0300 Subject: [PATCH 127/492] base: Remove the Units namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I3d885e656caea0f96dfbdda69713832ff5f79d28 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67361 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/base/stats/units.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/base/stats/units.hh b/src/base/stats/units.hh index fe5b23d878..1d7d640ddb 100644 --- a/src/base/stats/units.hh +++ b/src/base/stats/units.hh @@ -109,7 +109,6 @@ namespace statistics * - The new unit is significant enough to be not included in Count unit. * (e.g. Cycle unit, Tick unit) */ -GEM5_DEPRECATED_NAMESPACE(Units, units); namespace units { From cc3d75ad72e533f6daabcb4722091bf6199e0c48 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:59:03 -0300 Subject: [PATCH 128/492] base: Remove the Loader namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I87b763fccfcdf720909dfbda9c3fc8f6dea36a61 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67362 Tested-by: kokoro --- src/arch/mips/process.hh | 1 - src/arch/power/process.hh | 1 - src/arch/riscv/process.hh | 1 - src/base/loader/dtb_file.cc | 1 - src/base/loader/dtb_file.hh | 1 - src/base/loader/elf_object.cc | 1 - src/base/loader/elf_object.hh | 1 - src/base/loader/image_file.hh | 1 - src/base/loader/image_file_data.cc | 1 - src/base/loader/image_file_data.hh | 1 - src/base/loader/memory_image.cc | 1 - src/base/loader/memory_image.hh | 1 - src/base/loader/object_file.cc | 1 - src/base/loader/object_file.hh | 1 - src/base/loader/raw_image.hh | 1 - src/base/loader/symtab.cc | 1 - src/base/loader/symtab.hh | 1 - src/base/loader/symtab.test.cc | 290 ++++++++++++++--------------- src/cpu/profile.hh | 1 - src/cpu/static_inst.hh | 1 - src/sim/process.hh | 1 - 21 files changed, 145 insertions(+), 165 deletions(-) diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh index 181dd25497..8b84ec198c 100644 --- a/src/arch/mips/process.hh +++ b/src/arch/mips/process.hh @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { class ObjectFile; diff --git a/src/arch/power/process.hh b/src/arch/power/process.hh index c8d8a79864..9576c352b9 100644 --- a/src/arch/power/process.hh +++ b/src/arch/power/process.hh @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { class ObjectFile; diff --git a/src/arch/riscv/process.hh b/src/arch/riscv/process.hh index ca0a050349..64b9593965 100644 --- a/src/arch/riscv/process.hh +++ b/src/arch/riscv/process.hh @@ -40,7 +40,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { class ObjectFile; diff --git a/src/base/loader/dtb_file.cc b/src/base/loader/dtb_file.cc index 13e0264e92..f083b3e1fe 100644 --- a/src/base/loader/dtb_file.cc +++ b/src/base/loader/dtb_file.cc @@ -40,7 +40,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/dtb_file.hh b/src/base/loader/dtb_file.hh index c11b19539e..bed7cfc6f6 100644 --- a/src/base/loader/dtb_file.hh +++ b/src/base/loader/dtb_file.hh @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc index dc2abb8dfc..4b1467acf0 100644 --- a/src/base/loader/elf_object.cc +++ b/src/base/loader/elf_object.cc @@ -61,7 +61,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/elf_object.hh b/src/base/loader/elf_object.hh index 6159b35a7b..f08449206e 100644 --- a/src/base/loader/elf_object.hh +++ b/src/base/loader/elf_object.hh @@ -51,7 +51,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/image_file.hh b/src/base/loader/image_file.hh index 194c9567d7..f1d39555ec 100644 --- a/src/base/loader/image_file.hh +++ b/src/base/loader/image_file.hh @@ -39,7 +39,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/image_file_data.cc b/src/base/loader/image_file_data.cc index 57fb47fd4c..525d577936 100644 --- a/src/base/loader/image_file_data.cc +++ b/src/base/loader/image_file_data.cc @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/image_file_data.hh b/src/base/loader/image_file_data.hh index d02c499d1e..4d1701d9a3 100644 --- a/src/base/loader/image_file_data.hh +++ b/src/base/loader/image_file_data.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/memory_image.cc b/src/base/loader/memory_image.cc index 5537f28023..a3f378c10b 100644 --- a/src/base/loader/memory_image.cc +++ b/src/base/loader/memory_image.cc @@ -32,7 +32,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/memory_image.hh b/src/base/loader/memory_image.hh index 2c56f4c088..1207e7458d 100644 --- a/src/base/loader/memory_image.hh +++ b/src/base/loader/memory_image.hh @@ -46,7 +46,6 @@ namespace gem5 class PortProxy; -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/object_file.cc b/src/base/loader/object_file.cc index 3aa5915cdb..287f9107a6 100644 --- a/src/base/loader/object_file.cc +++ b/src/base/loader/object_file.cc @@ -48,7 +48,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/object_file.hh b/src/base/loader/object_file.hh index 0415bec62e..f0781165a1 100644 --- a/src/base/loader/object_file.hh +++ b/src/base/loader/object_file.hh @@ -55,7 +55,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/raw_image.hh b/src/base/loader/raw_image.hh index 7321ea40bf..29f4340beb 100644 --- a/src/base/loader/raw_image.hh +++ b/src/base/loader/raw_image.hh @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/symtab.cc b/src/base/loader/symtab.cc index f2f54e937b..941ea101c9 100644 --- a/src/base/loader/symtab.cc +++ b/src/base/loader/symtab.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/symtab.hh b/src/base/loader/symtab.hh index e48e400a4f..2e50523c32 100644 --- a/src/base/loader/symtab.hh +++ b/src/base/loader/symtab.hh @@ -44,7 +44,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { diff --git a/src/base/loader/symtab.test.cc b/src/base/loader/symtab.test.cc index e9edb113be..313055392a 100644 --- a/src/base/loader/symtab.test.cc +++ b/src/base/loader/symtab.test.cc @@ -48,7 +48,7 @@ using namespace gem5; * @return The error string, if any. */ std::string -getSymbolError(const Loader::Symbol& symbol, const Loader::Symbol& expected) +getSymbolError(const loader::Symbol& symbol, const loader::Symbol& expected) { std::stringstream ss; @@ -83,7 +83,7 @@ getSymbolError(const Loader::Symbol& symbol, const Loader::Symbol& expected) */ ::testing::AssertionResult checkSymbol(const char* m_symbol, const char* m_expected, - const Loader::Symbol& symbol, const Loader::Symbol& expected) + const loader::Symbol& symbol, const loader::Symbol& expected) { const std::string error = getSymbolError(symbol, expected); if (!error.empty()) { @@ -101,8 +101,8 @@ checkSymbol(const char* m_symbol, const char* m_expected, * @return A GTest's assertion result, with error message on failure. */ ::testing::AssertionResult -checkTable(const Loader::SymbolTable& symtab, - const std::initializer_list& expected) +checkTable(const loader::SymbolTable& symtab, + const std::initializer_list& expected) { if (expected.size() != (symtab.end() - symtab.begin())) { return ::testing::AssertionFailure() << "the number of symbols in " @@ -126,7 +126,7 @@ checkTable(const Loader::SymbolTable& symtab, /** Test that the constructor creates an empty table. */ TEST(LoaderSymtabTest, EmptyConstruction) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; ASSERT_TRUE(symtab.empty()); ASSERT_TRUE(checkTable(symtab, {})); } @@ -134,9 +134,9 @@ TEST(LoaderSymtabTest, EmptyConstruction) /** Test that the insertion of a symbol with no name fails. */ TEST(LoaderSymtabTest, InsertSymbolNoName) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "", 0x10}; ASSERT_FALSE(symtab.insert(symbol)); ASSERT_TRUE(checkTable(symtab, {})); } @@ -144,9 +144,9 @@ TEST(LoaderSymtabTest, InsertSymbolNoName) /** Test that the insertion of one symbol in an empty table works. */ TEST(LoaderSymtabTest, InsertOneSymbol) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10}; ASSERT_TRUE(symtab.insert(symbol)); ASSERT_FALSE(symtab.empty()); @@ -156,12 +156,12 @@ TEST(LoaderSymtabTest, InsertOneSymbol) /** Test that the insertion of a symbol with an existing name fails. */ TEST(LoaderSymtabTest, InsertSymbolExistingName) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; const std::string name = "symbol"; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, name, 0x10}, - {Loader::Symbol::Binding::Local, name, 0x20}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, name, 0x10}, + {loader::Symbol::Binding::Local, name, 0x20}, }; ASSERT_TRUE(symtab.insert(symbols[0])); ASSERT_FALSE(symtab.insert(symbols[1])); @@ -173,12 +173,12 @@ TEST(LoaderSymtabTest, InsertSymbolExistingName) /** Test that the insertion of a symbol with an existing address works. */ TEST(LoaderSymtabTest, InsertSymbolExistingAddress) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; const Addr addr = 0x10; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", addr}, - {Loader::Symbol::Binding::Local, "symbol2", addr}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", addr}, + {loader::Symbol::Binding::Local, "symbol2", addr}, }; ASSERT_TRUE(symtab.insert(symbols[0])); ASSERT_TRUE(symtab.insert(symbols[1])); @@ -190,12 +190,12 @@ TEST(LoaderSymtabTest, InsertSymbolExistingAddress) /** Test that the insertion of one symbol in a non-empty table works. */ TEST(LoaderSymtabTest, InsertMultipleSymbols) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -209,12 +209,12 @@ TEST(LoaderSymtabTest, InsertMultipleSymbols) */ TEST(LoaderSymtabTest, ClearMultiple) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -231,12 +231,12 @@ TEST(LoaderSymtabTest, ClearMultiple) */ TEST(LoaderSymtabTest, Offset) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -249,7 +249,7 @@ TEST(LoaderSymtabTest, Offset) ASSERT_TRUE(checkTable(symtab, {symbols[0], symbols[1], symbols[2]})); // Check that the new table is offset - Loader::Symbol expected_symbols[] = { + loader::Symbol expected_symbols[] = { {symbols[0].binding, symbols[0].name, symbols[0].address + offset}, {symbols[1].binding, symbols[1].name, symbols[1].address + offset}, {symbols[2].binding, symbols[2].name, symbols[2].address + offset}, @@ -264,13 +264,13 @@ TEST(LoaderSymtabTest, Offset) */ TEST(LoaderSymtabTest, Mask) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x1310}, - {Loader::Symbol::Binding::Local, "symbol2", 0x2810}, - {Loader::Symbol::Binding::Local, "symbol3", 0x2920}, - {Loader::Symbol::Binding::Local, "symbol4", 0x3C20}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x1310}, + {loader::Symbol::Binding::Local, "symbol2", 0x2810}, + {loader::Symbol::Binding::Local, "symbol3", 0x2920}, + {loader::Symbol::Binding::Local, "symbol4", 0x3C20}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -285,7 +285,7 @@ TEST(LoaderSymtabTest, Mask) symbols[3]})); // Check that the new table is masked - Loader::Symbol expected_symbols[] = { + loader::Symbol expected_symbols[] = { {symbols[0].binding, symbols[0].name, symbols[0].address & mask}, {symbols[1].binding, symbols[1].name, symbols[1].address & mask}, {symbols[2].binding, symbols[2].name, symbols[2].address & mask}, @@ -301,13 +301,13 @@ TEST(LoaderSymtabTest, Mask) */ TEST(LoaderSymtabTest, Rename) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Local, "symbol4", 0x40}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Local, "symbol4", 0x40}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -322,7 +322,7 @@ TEST(LoaderSymtabTest, Rename) symbols[3]})); // Check that the new table's symbols have been renamed - Loader::Symbol expected_symbols[] = { + loader::Symbol expected_symbols[] = { {symbols[0].binding, symbols[0].name + "_suffix", symbols[0].address}, {symbols[1].binding, symbols[1].name + "_suffix", symbols[1].address}, {symbols[2].binding, symbols[2].name + "_suffix", symbols[2].address}, @@ -338,13 +338,13 @@ TEST(LoaderSymtabTest, Rename) */ TEST(LoaderSymtabTest, RenameNonUnique) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Local, "symbol4", 0x40}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Local, "symbol4", 0x40}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -365,7 +365,7 @@ TEST(LoaderSymtabTest, RenameNonUnique) // Check that the new table's symbols have been renamed, yet it does not // contain the symbols with duplicated names - Loader::Symbol expected_symbols[] = { + loader::Symbol expected_symbols[] = { {symbols[0].binding, "NonUniqueName", symbols[0].address}, {symbols[1].binding, symbols[1].name, symbols[1].address}, {symbols[3].binding, symbols[3].name, symbols[3].address}, @@ -380,14 +380,14 @@ TEST(LoaderSymtabTest, RenameNonUnique) */ TEST(LoaderSymtabTest, Globals) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Global, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Weak, "symbol4", 0x40}, - {Loader::Symbol::Binding::Weak, "symbol5", 0x50} + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Global, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Weak, "symbol4", 0x40}, + {loader::Symbol::Binding::Weak, "symbol5", 0x50} }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -411,14 +411,14 @@ TEST(LoaderSymtabTest, Globals) */ TEST(LoaderSymtabTest, Locals) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Global, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Weak, "symbol4", 0x40}, - {Loader::Symbol::Binding::Weak, "symbol5", 0x50} + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Global, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Weak, "symbol4", 0x40}, + {loader::Symbol::Binding::Weak, "symbol5", 0x50} }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -442,14 +442,14 @@ TEST(LoaderSymtabTest, Locals) */ TEST(LoaderSymtabTest, Weaks) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Global, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Weak, "symbol4", 0x40}, - {Loader::Symbol::Binding::Weak, "symbol5", 0x50} + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Global, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Weak, "symbol4", 0x40}, + {loader::Symbol::Binding::Weak, "symbol5", 0x50} }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -470,9 +470,9 @@ TEST(LoaderSymtabTest, Weaks) /** Test searching for a non-existent address. */ TEST(LoaderSymtabTest, FindNonExistentAddress) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10}; EXPECT_TRUE(symtab.insert(symbol)); ASSERT_EQ(symtab.find(0x0), symtab.end()); @@ -481,12 +481,12 @@ TEST(LoaderSymtabTest, FindNonExistentAddress) /** Test searching for a unique address. */ TEST(LoaderSymtabTest, FindUniqueAddress) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -502,13 +502,13 @@ TEST(LoaderSymtabTest, FindUniqueAddress) */ TEST(LoaderSymtabTest, FindNonUniqueAddress) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; const Addr addr = 0x20; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", addr}, - {Loader::Symbol::Binding::Local, "symbol3", addr}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", addr}, + {loader::Symbol::Binding::Local, "symbol3", addr}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -522,9 +522,9 @@ TEST(LoaderSymtabTest, FindNonUniqueAddress) /** Test searching for a non-existent name. */ TEST(LoaderSymtabTest, FindNonExistentName) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10}; EXPECT_TRUE(symtab.insert(symbol)); const auto it = symtab.find("symbol2"); @@ -534,12 +534,12 @@ TEST(LoaderSymtabTest, FindNonExistentName) /** Test searching for an existing name. */ TEST(LoaderSymtabTest, FindExistingName) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -553,11 +553,11 @@ TEST(LoaderSymtabTest, FindExistingName) /** Test searching for an existent address using findNearest. */ TEST(LoaderSymtabTest, FindNearestExact) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -573,9 +573,9 @@ TEST(LoaderSymtabTest, FindNearestExact) */ TEST(LoaderSymtabTest, FindNearestRound) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10}; EXPECT_TRUE(symtab.insert(symbol)); const auto it = symtab.findNearest(symbol.address + 0x1); @@ -590,11 +590,11 @@ TEST(LoaderSymtabTest, FindNearestRound) */ TEST(LoaderSymtabTest, FindNearestRoundWithNext) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -613,9 +613,9 @@ TEST(LoaderSymtabTest, FindNearestRoundWithNext) */ TEST(LoaderSymtabTest, FindNearestRoundWithNextNonExistent) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10}; EXPECT_TRUE(symtab.insert(symbol)); Addr next_addr; @@ -631,9 +631,9 @@ TEST(LoaderSymtabTest, FindNearestRoundWithNextNonExistent) */ TEST(LoaderSymtabTest, FindNearestNonExistent) { - Loader::SymbolTable symtab; + loader::SymbolTable symtab; - Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10}; + loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10}; EXPECT_TRUE(symtab.insert(symbol)); const auto it = symtab.findNearest(symbol.address - 0x1); @@ -647,23 +647,23 @@ TEST(LoaderSymtabTest, FindNearestNonExistent) TEST(LoaderSymtabTest, InsertTableConflicting) { const std::string name = "symbol"; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, name, 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Local, "symbol4", 0x40}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, name, 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Local, "symbol4", 0x40}, // Introduce name conflict - {Loader::Symbol::Binding::Local, name, 0x50}, + {loader::Symbol::Binding::Local, name, 0x50}, }; // Populate table 1 - Loader::SymbolTable symtab; + loader::SymbolTable symtab; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); EXPECT_TRUE(symtab.insert(symbols[2])); // Populate table 2 - Loader::SymbolTable symtab2; + loader::SymbolTable symtab2; EXPECT_TRUE(symtab2.insert(symbols[3])); EXPECT_TRUE(symtab2.insert(symbols[4])); @@ -681,22 +681,22 @@ TEST(LoaderSymtabTest, InsertTableConflicting) */ TEST(LoaderSymtabTest, InsertTable) { - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, - {Loader::Symbol::Binding::Local, "symbol4", 0x40}, - {Loader::Symbol::Binding::Local, "symbol5", 0x50}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, + {loader::Symbol::Binding::Local, "symbol4", 0x40}, + {loader::Symbol::Binding::Local, "symbol5", 0x50}, }; // Populate table 1 - Loader::SymbolTable symtab; + loader::SymbolTable symtab; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); EXPECT_TRUE(symtab.insert(symbols[2])); // Populate table 2 - Loader::SymbolTable symtab2; + loader::SymbolTable symtab2; EXPECT_TRUE(symtab2.insert(symbols[3])); EXPECT_TRUE(symtab2.insert(symbols[4])); @@ -717,11 +717,11 @@ using LoaderSymtabSerializationFixture = SerializationFixture; TEST_F(LoaderSymtabSerializationFixture, Serialization) { // Populate the table - Loader::SymbolTable symtab; - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::SymbolTable symtab; + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; EXPECT_TRUE(symtab.insert(symbols[0])); EXPECT_TRUE(symtab.insert(symbols[1])); @@ -742,17 +742,17 @@ TEST_F(LoaderSymtabSerializationFixture, Serialization) /** Test unserialization. */ TEST_F(LoaderSymtabSerializationFixture, Unserialization) { - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Local, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Local, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; simulateSerialization("\n[Section1]\ntest.size=3\n" "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n" "test.addr_1=32\ntest.symbol_1=symbol2\ntest.binding_1=1\n" "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"); - Loader::SymbolTable unserialized_symtab; + loader::SymbolTable unserialized_symtab; CheckpointIn cp(getDirName()); Serializable::ScopedCheckpointSection scs(cp, "Section1"); unserialized_symtab.unserialize("test", cp); @@ -770,17 +770,17 @@ TEST_F(LoaderSymtabSerializationFixture, Unserialization) */ TEST_F(LoaderSymtabSerializationFixture, UnserializationMissingBinding) { - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Global, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Global, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; simulateSerialization("\n[Section1]\ntest.size=3\n" "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n" "test.addr_1=32\ntest.symbol_1=symbol2\n" "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"); - Loader::SymbolTable unserialized_symtab; + loader::SymbolTable unserialized_symtab; CheckpointIn cp(getDirName()); Serializable::ScopedCheckpointSection scs(cp, "Section1"); @@ -800,22 +800,22 @@ TEST_F(LoaderSymtabSerializationFixture, UnserializationMissingBinding) TEST_F(LoaderSymtabSerializationFixture, UnserializationMissingBindingChangeDefault) { - Loader::Symbol symbols[] = { - {Loader::Symbol::Binding::Local, "symbol", 0x10}, - {Loader::Symbol::Binding::Weak, "symbol2", 0x20}, - {Loader::Symbol::Binding::Local, "symbol3", 0x30}, + loader::Symbol symbols[] = { + {loader::Symbol::Binding::Local, "symbol", 0x10}, + {loader::Symbol::Binding::Weak, "symbol2", 0x20}, + {loader::Symbol::Binding::Local, "symbol3", 0x30}, }; simulateSerialization("\n[Section1]\ntest.size=3\n" "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n" "test.addr_1=32\ntest.symbol_1=symbol2\n" "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"); - Loader::SymbolTable unserialized_symtab; + loader::SymbolTable unserialized_symtab; CheckpointIn cp(getDirName()); Serializable::ScopedCheckpointSection scs(cp, "Section1"); unserialized_symtab.unserialize("test", cp, - Loader::Symbol::Binding::Weak); + loader::Symbol::Binding::Weak); // Make sure that the symbols in symtab are present in the // unserialized table diff --git a/src/cpu/profile.hh b/src/cpu/profile.hh index a5e16d6ac1..68283f5b06 100644 --- a/src/cpu/profile.hh +++ b/src/cpu/profile.hh @@ -43,7 +43,6 @@ namespace gem5 class ThreadContext; class FunctionProfile; -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { class SymbolTable; diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 21ce2aaf8b..3ab78345bc 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -66,7 +66,6 @@ class Packet; class ExecContext; class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { class SymbolTable; diff --git a/src/sim/process.hh b/src/sim/process.hh index cece212d14..d6d30cebc8 100644 --- a/src/sim/process.hh +++ b/src/sim/process.hh @@ -49,7 +49,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Loader, loader); namespace loader { class ObjectFile; From c1839aad77a4b8e128864b860d34329a918479ea Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:52:45 -0300 Subject: [PATCH 129/492] fastmodel: Remove the FastModel namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Ic0a42f7349ccf15f8c1dd276a647e7cb2a56c1cb Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67363 Reviewed-by: Richard Cooper Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/arch/arm/fastmodel/CortexA76/cortex_a76.cc | 1 - src/arch/arm/fastmodel/CortexA76/cortex_a76.hh | 1 - src/arch/arm/fastmodel/CortexA76/evs.cc | 1 - src/arch/arm/fastmodel/CortexA76/evs.hh | 1 - src/arch/arm/fastmodel/CortexA76/thread_context.cc | 1 - src/arch/arm/fastmodel/CortexA76/thread_context.hh | 1 - src/arch/arm/fastmodel/CortexR52/cortex_r52.cc | 1 - src/arch/arm/fastmodel/CortexR52/cortex_r52.hh | 1 - src/arch/arm/fastmodel/CortexR52/evs.cc | 1 - src/arch/arm/fastmodel/CortexR52/evs.hh | 1 - src/arch/arm/fastmodel/CortexR52/thread_context.cc | 1 - src/arch/arm/fastmodel/CortexR52/thread_context.hh | 1 - src/arch/arm/fastmodel/GIC/gic.cc | 1 - src/arch/arm/fastmodel/GIC/gic.hh | 1 - src/arch/arm/fastmodel/PL330_DMAC/pl330.cc | 1 - src/arch/arm/fastmodel/PL330_DMAC/pl330.hh | 1 - src/arch/arm/fastmodel/amba_from_tlm_bridge.cc | 1 - src/arch/arm/fastmodel/amba_from_tlm_bridge.hh | 1 - src/arch/arm/fastmodel/amba_ports.hh | 1 - src/arch/arm/fastmodel/amba_to_tlm_bridge.cc | 1 - src/arch/arm/fastmodel/amba_to_tlm_bridge.hh | 1 - src/arch/arm/fastmodel/common/signal_receiver.hh | 1 - src/arch/arm/fastmodel/common/signal_sender.hh | 1 - 23 files changed, 23 deletions(-) diff --git a/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc b/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc index 9280a042ee..ea1f477f59 100644 --- a/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc +++ b/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh b/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh index 39f916e4e3..61bf501554 100644 --- a/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh +++ b/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh @@ -42,7 +42,6 @@ namespace gem5 class BaseCPU; -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexA76/evs.cc b/src/arch/arm/fastmodel/CortexA76/evs.cc index c9ce3cc656..b299ad1a28 100644 --- a/src/arch/arm/fastmodel/CortexA76/evs.cc +++ b/src/arch/arm/fastmodel/CortexA76/evs.cc @@ -37,7 +37,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexA76/evs.hh b/src/arch/arm/fastmodel/CortexA76/evs.hh index 7c4ef601a7..9f08071dae 100644 --- a/src/arch/arm/fastmodel/CortexA76/evs.hh +++ b/src/arch/arm/fastmodel/CortexA76/evs.hh @@ -52,7 +52,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexA76/thread_context.cc b/src/arch/arm/fastmodel/CortexA76/thread_context.cc index 672f3b724f..c6704852fc 100644 --- a/src/arch/arm/fastmodel/CortexA76/thread_context.cc +++ b/src/arch/arm/fastmodel/CortexA76/thread_context.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexA76/thread_context.hh b/src/arch/arm/fastmodel/CortexA76/thread_context.hh index d7b8ed541c..6e3d85485e 100644 --- a/src/arch/arm/fastmodel/CortexA76/thread_context.hh +++ b/src/arch/arm/fastmodel/CortexA76/thread_context.hh @@ -33,7 +33,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc index 9dfe7a5158..a22492e932 100644 --- a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc +++ b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh b/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh index 76c7d33ea4..186383d728 100644 --- a/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh +++ b/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh @@ -42,7 +42,6 @@ namespace gem5 class BaseCPU; -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc index 0ad3f18412..47fbc36313 100644 --- a/src/arch/arm/fastmodel/CortexR52/evs.cc +++ b/src/arch/arm/fastmodel/CortexR52/evs.cc @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh index 9cebec3846..6516f4c687 100644 --- a/src/arch/arm/fastmodel/CortexR52/evs.hh +++ b/src/arch/arm/fastmodel/CortexR52/evs.hh @@ -54,7 +54,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexR52/thread_context.cc b/src/arch/arm/fastmodel/CortexR52/thread_context.cc index f3e170941e..a20f8e0a89 100644 --- a/src/arch/arm/fastmodel/CortexR52/thread_context.cc +++ b/src/arch/arm/fastmodel/CortexR52/thread_context.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/CortexR52/thread_context.hh b/src/arch/arm/fastmodel/CortexR52/thread_context.hh index 7126a371a1..5a0d34f274 100644 --- a/src/arch/arm/fastmodel/CortexR52/thread_context.hh +++ b/src/arch/arm/fastmodel/CortexR52/thread_context.hh @@ -33,7 +33,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/GIC/gic.cc b/src/arch/arm/fastmodel/GIC/gic.cc index fbe863a166..493aa81fcd 100644 --- a/src/arch/arm/fastmodel/GIC/gic.cc +++ b/src/arch/arm/fastmodel/GIC/gic.cc @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/GIC/gic.hh b/src/arch/arm/fastmodel/GIC/gic.hh index 33a172ded2..0e502fc633 100644 --- a/src/arch/arm/fastmodel/GIC/gic.hh +++ b/src/arch/arm/fastmodel/GIC/gic.hh @@ -48,7 +48,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc index 13162bd409..f9e6e2dc3e 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc +++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh index 389f7047c7..e7811fc576 100644 --- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh +++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh @@ -50,7 +50,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc index f84e58121e..4baf0ef7aa 100644 --- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc +++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh index 4484ea92c6..8ea8b8a731 100644 --- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh +++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/amba_ports.hh b/src/arch/arm/fastmodel/amba_ports.hh index 845c5e97a4..8e3dca055e 100644 --- a/src/arch/arm/fastmodel/amba_ports.hh +++ b/src/arch/arm/fastmodel/amba_ports.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc index e8807c474f..58f6eeab6b 100644 --- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc +++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc @@ -68,7 +68,6 @@ struct FarAtomicOpFunctor : public AtomicOpFunctor } -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh index 6874052a56..addaac67f9 100644 --- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh +++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/common/signal_receiver.hh b/src/arch/arm/fastmodel/common/signal_receiver.hh index 9ec760e32b..3036e3447d 100644 --- a/src/arch/arm/fastmodel/common/signal_receiver.hh +++ b/src/arch/arm/fastmodel/common/signal_receiver.hh @@ -44,7 +44,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { diff --git a/src/arch/arm/fastmodel/common/signal_sender.hh b/src/arch/arm/fastmodel/common/signal_sender.hh index c596ed108e..fb835c9fe6 100644 --- a/src/arch/arm/fastmodel/common/signal_sender.hh +++ b/src/arch/arm/fastmodel/common/signal_sender.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel); namespace fastmodel { From d14cde6bd709e6d338ec1a1ae6082ec384ac21d0 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:55:26 -0300 Subject: [PATCH 130/492] misc: Remove the Linux namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I73d7792ab8897d00b143d82d0fb70987ca410438 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67364 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Richard Cooper --- src/arch/generic/linux/threadinfo.hh | 1 - src/arch/mips/linux/hwrpb.hh | 1 - src/arch/mips/linux/thread_info.hh | 1 - src/kern/linux/events.cc | 1 - src/kern/linux/events.hh | 1 - src/kern/linux/helpers.hh | 1 - src/kern/linux/printk.cc | 1 - src/kern/linux/printk.hh | 1 - 8 files changed, 8 deletions(-) diff --git a/src/arch/generic/linux/threadinfo.hh b/src/arch/generic/linux/threadinfo.hh index 7702f0e0b9..70511c47fa 100644 --- a/src/arch/generic/linux/threadinfo.hh +++ b/src/arch/generic/linux/threadinfo.hh @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { diff --git a/src/arch/mips/linux/hwrpb.hh b/src/arch/mips/linux/hwrpb.hh index b5dcb18b77..3c5e439098 100644 --- a/src/arch/mips/linux/hwrpb.hh +++ b/src/arch/mips/linux/hwrpb.hh @@ -30,7 +30,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { struct pcb_struct diff --git a/src/arch/mips/linux/thread_info.hh b/src/arch/mips/linux/thread_info.hh index df376f0c11..986c896257 100644 --- a/src/arch/mips/linux/thread_info.hh +++ b/src/arch/mips/linux/thread_info.hh @@ -34,7 +34,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { struct thread_info diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc index 6ec883c2e7..35767596af 100644 --- a/src/kern/linux/events.cc +++ b/src/kern/linux/events.cc @@ -54,7 +54,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { diff --git a/src/kern/linux/events.hh b/src/kern/linux/events.hh index 75492093f4..966c1ba075 100644 --- a/src/kern/linux/events.hh +++ b/src/kern/linux/events.hh @@ -57,7 +57,6 @@ namespace gem5 class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { diff --git a/src/kern/linux/helpers.hh b/src/kern/linux/helpers.hh index 1ad5b413fb..b8d3c49a36 100644 --- a/src/kern/linux/helpers.hh +++ b/src/kern/linux/helpers.hh @@ -47,7 +47,6 @@ namespace gem5 class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { diff --git a/src/kern/linux/printk.cc b/src/kern/linux/printk.cc index c356016985..ccb1e8ab88 100644 --- a/src/kern/linux/printk.cc +++ b/src/kern/linux/printk.cc @@ -42,7 +42,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { diff --git a/src/kern/linux/printk.hh b/src/kern/linux/printk.hh index 7b545bc498..1e265a7f3d 100644 --- a/src/kern/linux/printk.hh +++ b/src/kern/linux/printk.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Linux, linux); namespace linux { From b2bf811aeaf91ab5e30181f43d6966739294c327 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:56:06 -0300 Subject: [PATCH 131/492] misc: Remove the FreeBSD namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Ic0c838709121278584a295ea19a8283d5765b9c9 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67365 Maintainer: Jason Lowe-Power Reviewed-by: Richard Cooper Tested-by: kokoro --- src/arch/generic/freebsd/threadinfo.hh | 1 - src/kern/freebsd/events.cc | 1 - src/kern/freebsd/events.hh | 1 - 3 files changed, 3 deletions(-) diff --git a/src/arch/generic/freebsd/threadinfo.hh b/src/arch/generic/freebsd/threadinfo.hh index f77772a878..443367f38d 100644 --- a/src/arch/generic/freebsd/threadinfo.hh +++ b/src/arch/generic/freebsd/threadinfo.hh @@ -39,7 +39,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FreeBSD, free_bsd); namespace free_bsd { diff --git a/src/kern/freebsd/events.cc b/src/kern/freebsd/events.cc index ce2291ed0c..667b10b49d 100644 --- a/src/kern/freebsd/events.cc +++ b/src/kern/freebsd/events.cc @@ -44,7 +44,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FreeBSD, free_bsd); namespace free_bsd { diff --git a/src/kern/freebsd/events.hh b/src/kern/freebsd/events.hh index c89ad0cad8..f4e350f11a 100644 --- a/src/kern/freebsd/events.hh +++ b/src/kern/freebsd/events.hh @@ -40,7 +40,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(FreeBSD, free_bsd); namespace free_bsd { From 1e80ba78627716824908340e83eb6711509fc332 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:56:59 -0300 Subject: [PATCH 132/492] misc: Remove the Net namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Ia2e1ef1619f51a0d7c0da9c7b4a160cd88ed8a65 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67366 Tested-by: kokoro Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power --- src/base/inet.cc | 1 - src/base/inet.hh | 1 - 2 files changed, 2 deletions(-) diff --git a/src/base/inet.cc b/src/base/inet.cc index ab4bfe460c..fc7505ecb7 100644 --- a/src/base/inet.cc +++ b/src/base/inet.cc @@ -54,7 +54,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Net, networking); namespace networking { diff --git a/src/base/inet.hh b/src/base/inet.hh index 3897f6364c..2cc3c6a3c8 100644 --- a/src/base/inet.hh +++ b/src/base/inet.hh @@ -68,7 +68,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Net, networking); namespace networking { From 93f0de95d6db1ddcd79473d4d25f830c132316dd Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 09:57:55 -0300 Subject: [PATCH 133/492] misc: Remove the m5 namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Iffc6d903da1d619c0914379d0ceabc88453b3ac7 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67367 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/base/coroutine.hh | 1 - src/base/stl_helpers.hh | 1 - 2 files changed, 2 deletions(-) diff --git a/src/base/coroutine.hh b/src/base/coroutine.hh index 63b26aa3eb..000a0bf0f9 100644 --- a/src/base/coroutine.hh +++ b/src/base/coroutine.hh @@ -44,7 +44,6 @@ #include "base/compiler.hh" #include "base/fiber.hh" -GEM5_DEPRECATED_NAMESPACE(m5, gem5); namespace gem5 { diff --git a/src/base/stl_helpers.hh b/src/base/stl_helpers.hh index d16446d5c3..d12f266350 100644 --- a/src/base/stl_helpers.hh +++ b/src/base/stl_helpers.hh @@ -36,7 +36,6 @@ #include "base/compiler.hh" -GEM5_DEPRECATED_NAMESPACE(m5, gem5); namespace gem5 { From 2ec3f64af80b50b0bb853d2225f020682e0b09f7 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:00:09 -0300 Subject: [PATCH 134/492] cpu: Remove the DecodeCache namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Ia1b2ab5444464f7c0ee85c8d288e38be4d7c013f Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67368 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/decode_cache.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpu/decode_cache.hh b/src/cpu/decode_cache.hh index 4e5631a460..cbd3c933b2 100644 --- a/src/cpu/decode_cache.hh +++ b/src/cpu/decode_cache.hh @@ -38,7 +38,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(DecodeCache, decode_cache); namespace decode_cache { From 161519177e0022a53daf02d20b1d1a0d9dc685ee Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:00:39 -0300 Subject: [PATCH 135/492] cpu: Remove the Minor namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I603134248a05c988627bbd3c59c962b085b3b2ad Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67369 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/minor/activity.cc | 1 - src/cpu/minor/activity.hh | 1 - src/cpu/minor/buffers.hh | 1 - src/cpu/minor/cpu.hh | 1 - src/cpu/minor/decode.cc | 1 - src/cpu/minor/decode.hh | 1 - src/cpu/minor/dyn_inst.cc | 1 - src/cpu/minor/dyn_inst.hh | 1 - src/cpu/minor/exec_context.hh | 1 - src/cpu/minor/execute.cc | 1 - src/cpu/minor/execute.hh | 1 - src/cpu/minor/fetch1.cc | 1 - src/cpu/minor/fetch1.hh | 1 - src/cpu/minor/fetch2.cc | 1 - src/cpu/minor/fetch2.hh | 1 - src/cpu/minor/func_unit.cc | 1 - src/cpu/minor/func_unit.hh | 1 - src/cpu/minor/lsq.cc | 1 - src/cpu/minor/lsq.hh | 1 - src/cpu/minor/pipe_data.cc | 1 - src/cpu/minor/pipe_data.hh | 1 - src/cpu/minor/pipeline.cc | 1 - src/cpu/minor/pipeline.hh | 1 - src/cpu/minor/scoreboard.cc | 1 - src/cpu/minor/scoreboard.hh | 1 - src/cpu/minor/stats.cc | 1 - src/cpu/minor/stats.hh | 1 - src/cpu/minor/trace.hh | 1 - 28 files changed, 28 deletions(-) diff --git a/src/cpu/minor/activity.cc b/src/cpu/minor/activity.cc index f78e927bce..f2f65b37f1 100644 --- a/src/cpu/minor/activity.cc +++ b/src/cpu/minor/activity.cc @@ -44,7 +44,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/activity.hh b/src/cpu/minor/activity.hh index b94221730a..d052e0f41c 100644 --- a/src/cpu/minor/activity.hh +++ b/src/cpu/minor/activity.hh @@ -50,7 +50,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/buffers.hh b/src/cpu/minor/buffers.hh index 648ec49336..e461a5cdaf 100644 --- a/src/cpu/minor/buffers.hh +++ b/src/cpu/minor/buffers.hh @@ -59,7 +59,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh index b5b04ae908..acf4295ac9 100644 --- a/src/cpu/minor/cpu.hh +++ b/src/cpu/minor/cpu.hh @@ -56,7 +56,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc index 53c02f321d..a4516a0c65 100644 --- a/src/cpu/minor/decode.cc +++ b/src/cpu/minor/decode.cc @@ -45,7 +45,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh index 156b92038c..913e03f5c1 100644 --- a/src/cpu/minor/decode.hh +++ b/src/cpu/minor/decode.hh @@ -56,7 +56,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc index ac8f94835c..68415ecd09 100644 --- a/src/cpu/minor/dyn_inst.cc +++ b/src/cpu/minor/dyn_inst.cc @@ -50,7 +50,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh index d9a85f9db6..9c6d6fd384 100644 --- a/src/cpu/minor/dyn_inst.hh +++ b/src/cpu/minor/dyn_inst.hh @@ -62,7 +62,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index f3dc3ba3d3..33641f37a9 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -59,7 +59,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 6eccec0be4..5eaaf5804e 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -57,7 +57,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh index 8a8c26302f..0a1dde1424 100644 --- a/src/cpu/minor/execute.hh +++ b/src/cpu/minor/execute.hh @@ -59,7 +59,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc index daf8d560b3..dd427b7570 100644 --- a/src/cpu/minor/fetch1.cc +++ b/src/cpu/minor/fetch1.cc @@ -54,7 +54,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh index e33eb0493b..f6a796ce82 100644 --- a/src/cpu/minor/fetch1.hh +++ b/src/cpu/minor/fetch1.hh @@ -58,7 +58,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc index 0ff0140518..b02294bfe6 100644 --- a/src/cpu/minor/fetch2.cc +++ b/src/cpu/minor/fetch2.cc @@ -52,7 +52,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh index 85012bf927..26c3a5ad1f 100644 --- a/src/cpu/minor/fetch2.hh +++ b/src/cpu/minor/fetch2.hh @@ -57,7 +57,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/func_unit.cc b/src/cpu/minor/func_unit.cc index 428a8b3508..f508d58333 100644 --- a/src/cpu/minor/func_unit.cc +++ b/src/cpu/minor/func_unit.cc @@ -74,7 +74,6 @@ MinorFUTiming::MinorFUTiming( opClasses(params.opClasses) { } -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/func_unit.hh b/src/cpu/minor/func_unit.hh index 9400f91790..13ae01957a 100644 --- a/src/cpu/minor/func_unit.hh +++ b/src/cpu/minor/func_unit.hh @@ -198,7 +198,6 @@ class MinorFUPool : public SimObject { } }; -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc index f2fa5be115..4b31b26577 100644 --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -53,7 +53,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh index 4a95bf75c0..4d7c351e7a 100644 --- a/src/cpu/minor/lsq.hh +++ b/src/cpu/minor/lsq.hh @@ -58,7 +58,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc index d7f113cfa3..3bda659de0 100644 --- a/src/cpu/minor/pipe_data.cc +++ b/src/cpu/minor/pipe_data.cc @@ -40,7 +40,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh index 97651b4b46..15de50e984 100644 --- a/src/cpu/minor/pipe_data.hh +++ b/src/cpu/minor/pipe_data.hh @@ -57,7 +57,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc index e94181fcd8..c914843ec1 100644 --- a/src/cpu/minor/pipeline.cc +++ b/src/cpu/minor/pipeline.cc @@ -51,7 +51,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh index ce0ae07d3e..b1c85e37dd 100644 --- a/src/cpu/minor/pipeline.hh +++ b/src/cpu/minor/pipeline.hh @@ -57,7 +57,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index 2bb69668a7..356fdc7e01 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -44,7 +44,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index ac11533443..bccb9c5b80 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -56,7 +56,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index 187687d00c..64d4c475e0 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -40,7 +40,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index 47b9f0f30e..1ab81f4407 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -51,7 +51,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { diff --git a/src/cpu/minor/trace.hh b/src/cpu/minor/trace.hh index 8a98764ae6..9617d05030 100644 --- a/src/cpu/minor/trace.hh +++ b/src/cpu/minor/trace.hh @@ -57,7 +57,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Minor, minor); namespace minor { From 5f5aae8940ed7255dfe1b4435ae10a30f2319c7a Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:02:16 -0300 Subject: [PATCH 136/492] dev: Remove a couple of deprecated namespaces These namespaces have gone through the deprecation period and can now be removed: Sinic, SCMI, Ps2, Regs, Keyboard, Mouse, TxdOp, iGbReg, CopyEngineReg. Change-Id: Icfaf458bffca2658650318508c0bb376719cf911 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67370 Tested-by: kokoro Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power --- src/dev/arm/css/scmi_platform.hh | 1 - src/dev/arm/css/scmi_protocols.hh | 1 - src/dev/net/i8254xGBe_defs.hh | 2 -- src/dev/net/sinic.cc | 1 - src/dev/net/sinic.hh | 1 - src/dev/net/sinicreg.hh | 2 -- src/dev/pci/copy_engine_defs.hh | 1 - src/dev/ps2/types.cc | 1 - src/dev/ps2/types.hh | 3 --- 9 files changed, 13 deletions(-) diff --git a/src/dev/arm/css/scmi_platform.hh b/src/dev/arm/css/scmi_platform.hh index 581408dde2..92bec89408 100644 --- a/src/dev/arm/css/scmi_platform.hh +++ b/src/dev/arm/css/scmi_platform.hh @@ -49,7 +49,6 @@ namespace gem5 class Doorbell; -GEM5_DEPRECATED_NAMESPACE(SCMI, scmi); namespace scmi { diff --git a/src/dev/arm/css/scmi_protocols.hh b/src/dev/arm/css/scmi_protocols.hh index 03d6ea4f83..85e157baf2 100644 --- a/src/dev/arm/css/scmi_protocols.hh +++ b/src/dev/arm/css/scmi_protocols.hh @@ -46,7 +46,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(SCMI, scmi); namespace scmi { diff --git a/src/dev/net/i8254xGBe_defs.hh b/src/dev/net/i8254xGBe_defs.hh index 015ca7dee6..ef013a244f 100644 --- a/src/dev/net/i8254xGBe_defs.hh +++ b/src/dev/net/i8254xGBe_defs.hh @@ -35,7 +35,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(iGbReg, igbreg); namespace igbreg { @@ -239,7 +238,6 @@ struct TxDesc uint64_t d2; }; -GEM5_DEPRECATED_NAMESPACE(TxdOp, txd_op); namespace txd_op { diff --git a/src/dev/net/sinic.cc b/src/dev/net/sinic.cc index c1afb284c0..69a42edb88 100644 --- a/src/dev/net/sinic.cc +++ b/src/dev/net/sinic.cc @@ -48,7 +48,6 @@ namespace gem5 using namespace networking; -GEM5_DEPRECATED_NAMESPACE(Sinic, sinic); namespace sinic { diff --git a/src/dev/net/sinic.hh b/src/dev/net/sinic.hh index 2b0f9fa8cd..adad53b37f 100644 --- a/src/dev/net/sinic.hh +++ b/src/dev/net/sinic.hh @@ -45,7 +45,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Sinic, sinic); namespace sinic { diff --git a/src/dev/net/sinicreg.hh b/src/dev/net/sinicreg.hh index 120b9a194f..47588df71a 100644 --- a/src/dev/net/sinicreg.hh +++ b/src/dev/net/sinicreg.hh @@ -59,11 +59,9 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Sinic, sinic); namespace sinic { -GEM5_DEPRECATED_NAMESPACE(Regs, registers); namespace registers { diff --git a/src/dev/pci/copy_engine_defs.hh b/src/dev/pci/copy_engine_defs.hh index 9e687e3324..107edee77b 100644 --- a/src/dev/pci/copy_engine_defs.hh +++ b/src/dev/pci/copy_engine_defs.hh @@ -36,7 +36,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(CopyEngineReg, copy_engine_reg); namespace copy_engine_reg { diff --git a/src/dev/ps2/types.cc b/src/dev/ps2/types.cc index 99e740e246..00e442e209 100644 --- a/src/dev/ps2/types.cc +++ b/src/dev/ps2/types.cc @@ -45,7 +45,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Ps2, ps2); namespace ps2 { diff --git a/src/dev/ps2/types.hh b/src/dev/ps2/types.hh index 4ad7b05886..3286c97f57 100644 --- a/src/dev/ps2/types.hh +++ b/src/dev/ps2/types.hh @@ -53,7 +53,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(Ps2, ps2); namespace ps2 { @@ -70,7 +69,6 @@ enum Reset = 0xFF, }; -GEM5_DEPRECATED_NAMESPACE(Keyboard, keyboard); namespace keyboard { @@ -93,7 +91,6 @@ extern const std::vector ID; } // namespace keyboard -GEM5_DEPRECATED_NAMESPACE(Mouse, mouse); namespace mouse { From c1c79615e0eeaaa5d5b2c4afd4444679d47d6ffc Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:07:08 -0300 Subject: [PATCH 137/492] sim: Remove the ProbePoints namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: Iddf30ea24a579cf5a94d6217c1d015a0c68d68d0 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67371 Reviewed-by: Richard Cooper Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/sim/probe/mem.hh | 1 - src/sim/probe/pmu.hh | 1 - src/sim/probe/probe.hh | 1 - 3 files changed, 3 deletions(-) diff --git a/src/sim/probe/mem.hh b/src/sim/probe/mem.hh index df3280cfc9..0496de9b23 100644 --- a/src/sim/probe/mem.hh +++ b/src/sim/probe/mem.hh @@ -46,7 +46,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ProbePoints, probing); namespace probing { diff --git a/src/sim/probe/pmu.hh b/src/sim/probe/pmu.hh index acf47501e2..b589ce7d4b 100644 --- a/src/sim/probe/pmu.hh +++ b/src/sim/probe/pmu.hh @@ -45,7 +45,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(ProbePoints, probing); namespace probing { diff --git a/src/sim/probe/probe.hh b/src/sim/probe/probe.hh index dede7adeb5..3dd428effd 100644 --- a/src/sim/probe/probe.hh +++ b/src/sim/probe/probe.hh @@ -86,7 +86,6 @@ struct ProbeListenerObjectParams; * common instrumentation interface for devices such as PMUs that have * different implementations in different ISAs. */ -GEM5_DEPRECATED_NAMESPACE(ProbePoints, probing); namespace probing { /* Note: This is only here for documentation purposes, new probe From c8e3708d8993d091a8fdec9f001bafc1c80c0fbe Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:11:21 -0300 Subject: [PATCH 138/492] sim: Remove the Enums namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: If4daad57a421b076ae6661812c2255c7f06f30b9 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67372 Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power Tested-by: kokoro --- build_tools/enum_cc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py index cd192c56fb..5d82b401b2 100644 --- a/build_tools/enum_cc.py +++ b/build_tools/enum_cc.py @@ -97,8 +97,7 @@ const char *${name}Strings[static_cast(${name}::Num_${name})] = ) else: code( - """GEM5_DEPRECATED_NAMESPACE(Enums, enums); -namespace enums + """namespace enums {""" ) code.indent(1) From 31a1d485afcda352b1a9fc4f63a79d38399e22be Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:07:43 -0300 Subject: [PATCH 139/492] sim: Remove a couple of deprecated namespaces These namespaces have gone through the deprecation period and can now be removed: Int, Float, SimClock, PseudoInst Change-Id: Iec8e0fff021d8d7696e466e2ad52f2d51305d811 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67373 Tested-by: kokoro Reviewed-by: Richard Cooper Maintainer: Jason Lowe-Power --- src/arch/x86/bios/intelmp.hh | 1 - src/arch/x86/fs_workload.hh | 1 - src/sim/core.cc | 3 --- src/sim/core.hh | 3 --- src/sim/pseudo_inst.cc | 1 - src/sim/pseudo_inst.hh | 1 - 6 files changed, 10 deletions(-) diff --git a/src/arch/x86/bios/intelmp.hh b/src/arch/x86/bios/intelmp.hh index 19f2f7a9be..207b4ab61f 100644 --- a/src/arch/x86/bios/intelmp.hh +++ b/src/arch/x86/bios/intelmp.hh @@ -84,7 +84,6 @@ uint8_t writeOutString(PortProxy& proxy, Addr addr, std::string str, namespace X86ISA { -GEM5_DEPRECATED_NAMESPACE(IntelMP, intelmp); namespace intelmp { diff --git a/src/arch/x86/fs_workload.hh b/src/arch/x86/fs_workload.hh index b40b69b3c4..5c1187cda4 100644 --- a/src/arch/x86/fs_workload.hh +++ b/src/arch/x86/fs_workload.hh @@ -63,7 +63,6 @@ class SMBiosTable; } // namespace smbios -GEM5_DEPRECATED_NAMESPACE(IntelMP, intelmp); namespace intelmp { diff --git a/src/sim/core.cc b/src/sim/core.cc index c388652fa6..d836b550be 100644 --- a/src/sim/core.cc +++ b/src/sim/core.cc @@ -41,13 +41,11 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(SimClock, sim_clock); namespace sim_clock { /// The simulated frequency of curTick(). (In ticks per second) Tick Frequency; -GEM5_DEPRECATED_NAMESPACE(Float, as_float); namespace as_float { double s; @@ -62,7 +60,6 @@ double MHz; double GHz; } // namespace as_float -GEM5_DEPRECATED_NAMESPACE(Int, as_int); namespace as_int { Tick s; diff --git a/src/sim/core.hh b/src/sim/core.hh index bd432c2d21..bac4e40003 100644 --- a/src/sim/core.hh +++ b/src/sim/core.hh @@ -46,12 +46,10 @@ namespace gem5 /// These are variables that are set based on the simulator frequency ///@{ -GEM5_DEPRECATED_NAMESPACE(SimClock, sim_clock); namespace sim_clock { extern Tick Frequency; ///< The number of ticks that equal one second -GEM5_DEPRECATED_NAMESPACE(Float, as_float); namespace as_float { @@ -81,7 +79,6 @@ extern double GHz; ///< GHz * * @{ */ -GEM5_DEPRECATED_NAMESPACE(Int, as_int); namespace as_int { extern Tick s; ///< second diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index 28b5619a16..55e44c7adc 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -76,7 +76,6 @@ namespace gem5 using namespace statistics; -GEM5_DEPRECATED_NAMESPACE(PseudoInst, pseudo_inst); namespace pseudo_inst { diff --git a/src/sim/pseudo_inst.hh b/src/sim/pseudo_inst.hh index 4794a41ffe..ba15370c55 100644 --- a/src/sim/pseudo_inst.hh +++ b/src/sim/pseudo_inst.hh @@ -55,7 +55,6 @@ namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(PseudoInst, pseudo_inst); namespace pseudo_inst { From 39bbd9c05e5d634027ff936732b5e2ba87f7538f Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:13:31 -0300 Subject: [PATCH 140/492] sim,arch: Remove the GuestABI namespace This namespace has gone through the deprecation period and can now be removed. Change-Id: I476815491314f4222da43da75c91654b4f3d1228 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67374 Maintainer: Jason Lowe-Power Reviewed-by: Richard Cooper Tested-by: kokoro --- src/arch/arm/aapcs32.hh | 2 -- src/arch/arm/aapcs64.hh | 1 - src/arch/arm/freebsd/se_workload.hh | 1 - src/arch/arm/linux/se_workload.hh | 1 - src/arch/arm/reg_abi.hh | 1 - src/arch/arm/semihosting.cc | 1 - src/arch/arm/semihosting.hh | 1 - src/arch/mips/se_workload.hh | 1 - src/arch/power/se_workload.hh | 1 - src/arch/riscv/se_workload.hh | 1 - src/arch/sparc/pseudo_inst_abi.hh | 1 - src/arch/sparc/se_workload.hh | 1 - src/arch/x86/linux/linux.hh | 1 - src/arch/x86/linux/se_workload.hh | 1 - src/arch/x86/pseudo_inst_abi.hh | 1 - src/sim/guest_abi.test.cc | 1 - src/sim/guest_abi/definition.hh | 1 - src/sim/guest_abi/dispatch.hh | 1 - src/sim/guest_abi/layout.hh | 1 - src/sim/guest_abi/varargs.hh | 1 - src/sim/proxy_ptr.hh | 1 - src/sim/proxy_ptr.test.cc | 1 - src/sim/syscall_abi.hh | 1 - 23 files changed, 24 deletions(-) diff --git a/src/arch/arm/aapcs32.hh b/src/arch/arm/aapcs32.hh index 383b8eb36b..1d727e2634 100644 --- a/src/arch/arm/aapcs32.hh +++ b/src/arch/arm/aapcs32.hh @@ -70,7 +70,6 @@ struct Aapcs32 }; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { @@ -446,7 +445,6 @@ struct Aapcs32Vfp : public Aapcs32 }; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/arm/aapcs64.hh b/src/arch/arm/aapcs64.hh index 2f53822a70..62926d34b2 100644 --- a/src/arch/arm/aapcs64.hh +++ b/src/arch/arm/aapcs64.hh @@ -67,7 +67,6 @@ struct Aapcs64 }; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/arm/freebsd/se_workload.hh b/src/arch/arm/freebsd/se_workload.hh index b944dbd8f9..47e41f2590 100644 --- a/src/arch/arm/freebsd/se_workload.hh +++ b/src/arch/arm/freebsd/se_workload.hh @@ -70,7 +70,6 @@ class EmuFreebsd : public SEWorkload } // namespace ArmISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/arm/linux/se_workload.hh b/src/arch/arm/linux/se_workload.hh index 0939af1dda..29bd30a6bf 100644 --- a/src/arch/arm/linux/se_workload.hh +++ b/src/arch/arm/linux/se_workload.hh @@ -62,7 +62,6 @@ class EmuLinux : public SEWorkload } // namespace ArmISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/arm/reg_abi.hh b/src/arch/arm/reg_abi.hh index 1d5272c66d..e892166c5e 100644 --- a/src/arch/arm/reg_abi.hh +++ b/src/arch/arm/reg_abi.hh @@ -51,7 +51,6 @@ struct RegABI64 : public GenericSyscallABI64 } // namespace ArmISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/arm/semihosting.cc b/src/arch/arm/semihosting.cc index 8efe841623..4ce52e8741 100644 --- a/src/arch/arm/semihosting.cc +++ b/src/arch/arm/semihosting.cc @@ -714,7 +714,6 @@ struct SemiPseudoAbi64 : public ArmSemihosting::Abi64 }; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/arm/semihosting.hh b/src/arch/arm/semihosting.hh index fe7819cae9..557eb76636 100644 --- a/src/arch/arm/semihosting.hh +++ b/src/arch/arm/semihosting.hh @@ -599,7 +599,6 @@ class ArmSemihosting : public SimObject std::ostream &operator << ( std::ostream &os, const ArmSemihosting::InPlaceArg &ipa); -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/mips/se_workload.hh b/src/arch/mips/se_workload.hh index dc6f1dd5e3..18c0bda9c1 100644 --- a/src/arch/mips/se_workload.hh +++ b/src/arch/mips/se_workload.hh @@ -68,7 +68,6 @@ class SEWorkload : public gem5::SEWorkload } // namespace MipsISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/power/se_workload.hh b/src/arch/power/se_workload.hh index d041c45728..3c2bb936f4 100644 --- a/src/arch/power/se_workload.hh +++ b/src/arch/power/se_workload.hh @@ -68,7 +68,6 @@ class SEWorkload : public gem5::SEWorkload } // namespace PowerISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/riscv/se_workload.hh b/src/arch/riscv/se_workload.hh index 6f7c2edb70..9ae3be4c05 100644 --- a/src/arch/riscv/se_workload.hh +++ b/src/arch/riscv/se_workload.hh @@ -66,7 +66,6 @@ class SEWorkload : public gem5::SEWorkload } // namespace RiscvISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/sparc/pseudo_inst_abi.hh b/src/arch/sparc/pseudo_inst_abi.hh index 993e11bff3..989f0e7dfc 100644 --- a/src/arch/sparc/pseudo_inst_abi.hh +++ b/src/arch/sparc/pseudo_inst_abi.hh @@ -40,7 +40,6 @@ struct SparcPseudoInstABI using State = int; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/sparc/se_workload.hh b/src/arch/sparc/se_workload.hh index 8cb373ac90..e0f7467fae 100644 --- a/src/arch/sparc/se_workload.hh +++ b/src/arch/sparc/se_workload.hh @@ -80,7 +80,6 @@ class SEWorkload : public gem5::SEWorkload } // namespace SparcISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/x86/linux/linux.hh b/src/arch/x86/linux/linux.hh index 0c34d09330..b9598224a6 100644 --- a/src/arch/x86/linux/linux.hh +++ b/src/arch/x86/linux/linux.hh @@ -77,7 +77,6 @@ class X86Linux : public Linux class SyscallABI {}; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/x86/linux/se_workload.hh b/src/arch/x86/linux/se_workload.hh index f170776d82..d8d60e759f 100644 --- a/src/arch/x86/linux/se_workload.hh +++ b/src/arch/x86/linux/se_workload.hh @@ -96,7 +96,6 @@ class EmuLinux : public SEWorkload } // namespace X86ISA -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/arch/x86/pseudo_inst_abi.hh b/src/arch/x86/pseudo_inst_abi.hh index 05bf66f5f2..e465c7abd5 100644 --- a/src/arch/x86/pseudo_inst_abi.hh +++ b/src/arch/x86/pseudo_inst_abi.hh @@ -46,7 +46,6 @@ struct X86PseudoInstABI using State = int; }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/guest_abi.test.cc b/src/sim/guest_abi.test.cc index 5b59874c3e..64f4122a51 100644 --- a/src/sim/guest_abi.test.cc +++ b/src/sim/guest_abi.test.cc @@ -98,7 +98,6 @@ struct TestABI_TcInit namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/guest_abi/definition.hh b/src/sim/guest_abi/definition.hh index 2857b5b616..43aafecdc8 100644 --- a/src/sim/guest_abi/definition.hh +++ b/src/sim/guest_abi/definition.hh @@ -35,7 +35,6 @@ namespace gem5 class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/guest_abi/dispatch.hh b/src/sim/guest_abi/dispatch.hh index 7ada2e2ac4..8846354e06 100644 --- a/src/sim/guest_abi/dispatch.hh +++ b/src/sim/guest_abi/dispatch.hh @@ -43,7 +43,6 @@ namespace gem5 class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/guest_abi/layout.hh b/src/sim/guest_abi/layout.hh index 02681510d0..4d469b177c 100644 --- a/src/sim/guest_abi/layout.hh +++ b/src/sim/guest_abi/layout.hh @@ -38,7 +38,6 @@ namespace gem5 class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/guest_abi/varargs.hh b/src/sim/guest_abi/varargs.hh index 9bb04786ff..1a34f2086c 100644 --- a/src/sim/guest_abi/varargs.hh +++ b/src/sim/guest_abi/varargs.hh @@ -39,7 +39,6 @@ namespace gem5 class ThreadContext; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/proxy_ptr.hh b/src/sim/proxy_ptr.hh index 03ab9472c9..5e766f7cc9 100644 --- a/src/sim/proxy_ptr.hh +++ b/src/sim/proxy_ptr.hh @@ -357,7 +357,6 @@ operator + (A a, const ProxyPtr &other) return other + a; } -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/proxy_ptr.test.cc b/src/sim/proxy_ptr.test.cc index 6f49d166e2..57fdb973bb 100644 --- a/src/sim/proxy_ptr.test.cc +++ b/src/sim/proxy_ptr.test.cc @@ -474,7 +474,6 @@ struct TestABI namespace gem5 { -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { diff --git a/src/sim/syscall_abi.hh b/src/sim/syscall_abi.hh index b91dc8ea95..90dbd9747b 100644 --- a/src/sim/syscall_abi.hh +++ b/src/sim/syscall_abi.hh @@ -75,7 +75,6 @@ struct GenericSyscallABI32 : public GenericSyscallABI } }; -GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi); namespace guest_abi { From 41b5276c1cedae275e189e3404818d105107aa0b Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 9 Aug 2022 10:47:28 +0100 Subject: [PATCH 141/492] cpu-o3: Remove obsolete getRegIds and getTrueId These have been obsolete since https://gem5-review.googlesource.com/c/public/gem5/+/49147, hence removing. Change-Id: I06f6c3058f652907d996b9e6267888e2d991622a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64332 Maintainer: Giacomo Travaglini Reviewed-by: Jason Lowe-Power Reviewed-by: Giacomo Travaglini Tested-by: kokoro --- src/cpu/o3/regfile.cc | 42 ------------------------------------------ src/cpu/o3/regfile.hh | 14 -------------- 2 files changed, 56 deletions(-) diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index fecb891ca6..dcb8f704f0 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -175,47 +175,5 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) freeList->addRegs(ccRegIds.begin(), ccRegIds.end()); } -PhysRegFile::IdRange -PhysRegFile::getRegIds(RegClassType cls) -{ - switch (cls) - { - case IntRegClass: - return std::make_pair(intRegIds.begin(), intRegIds.end()); - case FloatRegClass: - return std::make_pair(floatRegIds.begin(), floatRegIds.end()); - case VecRegClass: - return std::make_pair(vecRegIds.begin(), vecRegIds.end()); - case VecElemClass: - return std::make_pair(vecElemIds.begin(), vecElemIds.end()); - case VecPredRegClass: - return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end()); - case CCRegClass: - return std::make_pair(ccRegIds.begin(), ccRegIds.end()); - case MiscRegClass: - return std::make_pair(miscRegIds.begin(), miscRegIds.end()); - case InvalidRegClass: - panic("Tried to get register IDs for the invalid class."); - } - /* There is no way to make an empty iterator */ - return std::make_pair(PhysIds::iterator(), - PhysIds::iterator()); -} - -PhysRegIdPtr -PhysRegFile::getTrueId(PhysRegIdPtr reg) -{ - switch (reg->classValue()) { - case VecRegClass: - return &vecRegIds[reg->index()]; - case VecElemClass: - return &vecElemIds[reg->index()]; - default: - panic_if(!reg->is(VecElemClass), - "Trying to get the register of a %s register", reg->className()); - } - return nullptr; -} - } // namespace o3 } // namespace gem5 diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 3ddf1a2a79..0130c55625 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -309,20 +309,6 @@ class PhysRegFile panic("Unrecognized register class type %d.", type); } } - - /** - * Get the PhysRegIds of the elems of all vector registers. - * Auxiliary function to transition from Full vector mode to Elem mode - * and to initialise the rename map. - */ - IdRange getRegIds(RegClassType cls); - - /** - * Get the true physical register id. - * As many parts work with PhysRegIdPtr, we need to be able to produce - * the pointer out of just class and register idx. - */ - PhysRegIdPtr getTrueId(PhysRegIdPtr reg); }; } // namespace o3 From befa5baa78bce145e47ae4ef6a9e1b4da6e46978 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 31 Aug 2022 10:37:02 +0100 Subject: [PATCH 142/492] cpu-o3: print VecPredReg not VecReg Fix a DPRINTF to print the VecPredReg instead of the VecReg. Change-Id: Iaba255b6b9a98826ddcd67eb83b4169e1bf5056e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64342 Reviewed-by: Jason Lowe-Power Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- src/cpu/o3/regfile.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 0130c55625..4fea589ad7 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -299,7 +299,7 @@ class PhysRegFile break; case VecPredRegClass: DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n", - idx, vectorRegFile.regClass.valString(val)); + idx, vecPredRegFile.regClass.valString(val)); vecPredRegFile.set(idx, val); break; case CCRegClass: From dd6595bf565b95fbb74e7d438030f48392becc20 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 17:11:30 +0100 Subject: [PATCH 143/492] mem-cache: masked writes are not whole-line writes We now explicitly check in both the cache and the MSHRs if writes are masked or not before promoting to a whole-line write. Failure to do this previously was resulting in data loss when dirty data was present in lower level caches and a coincidentally aligned and cache-line-sized masked write occured. Change-Id: I9434590d8b22e4d993167d789eb9d15a2e866bf1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64340 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/mem/cache/mshr.cc | 3 ++- src/mem/packet.hh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc index 871125a17e..b7e9357029 100644 --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -140,6 +140,7 @@ MSHR::TargetList::updateWriteFlags(PacketPtr pkt) Request::MEM_SWAP_COND | Request::SECURE | Request::LOCKED_RMW; const auto &req_flags = pkt->req->getFlags(); bool compat_write = !req_flags.isSet(no_merge_flags); + bool masked_write = pkt->isMaskedWrite(); // if this is the first write, it might be a whole // line write and even if we can't merge any @@ -147,7 +148,7 @@ MSHR::TargetList::updateWriteFlags(PacketPtr pkt) // it as a whole line write (e.g., SECURE whole line // write) bool first_write = empty(); - if (first_write || compat_write) { + if (!masked_write && (first_write || compat_write)) { auto offset = pkt->getOffset(blkSize); auto begin = writesBitmap.begin() + offset; std::fill(begin, begin + pkt->getSize(), true); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index a80b918798..9d720fb9a0 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -625,7 +625,8 @@ class Packet : public Printable bool isWholeLineWrite(unsigned blk_size) { return (cmd == MemCmd::WriteReq || cmd == MemCmd::WriteLineReq) && - getOffset(blk_size) == 0 && getSize() == blk_size; + getOffset(blk_size) == 0 && getSize() == blk_size && + !isMaskedWrite(); } //@{ From fed81f34084ad46fc663ec236ce0e700881cc3c2 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 9 Aug 2022 09:37:47 +0100 Subject: [PATCH 144/492] arch,cpu: Add boilerplate support for matrix registers We add initial support for matrix registers to the CPU models and add stubs in each architecture. There are no implementations of matrix registers added, but this provides the basic support for using them in the future. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I2ca6a21da932a58a801a0d08f0ad0cdca4968d02 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64333 Maintainer: Giacomo Travaglini Reviewed-by: Giacomo Travaglini Tested-by: kokoro --- src/arch/SConscript | 3 ++- src/arch/arm/isa.cc | 3 +++ src/arch/mips/isa.cc | 3 +++ src/arch/power/isa.cc | 3 +++ src/arch/riscv/isa.cc | 3 +++ src/arch/sparc/isa.cc | 3 +++ src/arch/x86/isa.cc | 3 +++ src/cpu/StaticInstFlags.py | 1 + src/cpu/minor/scoreboard.cc | 4 ++++ src/cpu/minor/scoreboard.hh | 5 ++++- src/cpu/o3/BaseO3CPU.py | 1 + src/cpu/o3/cpu.cc | 3 +++ src/cpu/o3/inst_queue.cc | 1 + src/cpu/o3/regfile.cc | 18 ++++++++++++++++++ src/cpu/o3/regfile.hh | 22 ++++++++++++++++++++++ src/cpu/o3/rename.cc | 9 ++++++++- src/cpu/o3/rename.hh | 1 + src/cpu/reg_class.hh | 2 ++ src/cpu/simple/base.cc | 6 ++++++ src/cpu/simple/exec_context.hh | 16 ++++++++++++++++ src/cpu/simple_thread.cc | 1 + src/cpu/simple_thread.hh | 1 + src/cpu/static_inst.hh | 1 + src/cpu/thread_context.cc | 14 ++++++++++++++ 24 files changed, 124 insertions(+), 3 deletions(-) diff --git a/src/arch/SConscript b/src/arch/SConscript index 90d7ad7700..7285c0ec59 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -231,10 +231,11 @@ DebugFlag('IntRegs') DebugFlag('FloatRegs') DebugFlag('VecRegs') DebugFlag('VecPredRegs') +DebugFlag('MatRegs') DebugFlag('CCRegs') DebugFlag('MiscRegs') CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'VecRegs', 'VecPredRegs', - 'CCRegs', 'MiscRegs' ]) + 'MatRegs', 'CCRegs', 'MiscRegs' ]) DebugFlag('Decoder', "Decoder debug output") DebugFlag('Faults', "Information about faults, exceptions, interrupts, etc") diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 543e0eba7b..617f144bae 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -54,6 +54,7 @@ #include "cpu/reg_class.hh" #include "debug/Arm.hh" #include "debug/LLSC.hh" +#include "debug/MatRegs.hh" #include "debug/VecPredRegs.hh" #include "debug/VecRegs.hh" #include "dev/arm/generic_timer.hh" @@ -75,6 +76,7 @@ namespace /* Not applicable to ARM */ RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); } // anonymous namespace @@ -86,6 +88,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL), _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/mips/isa.cc b/src/arch/mips/isa.cc index 6f39a81244..92799ab291 100644 --- a/src/arch/mips/isa.cc +++ b/src/arch/mips/isa.cc @@ -38,6 +38,7 @@ #include "cpu/base.hh" #include "cpu/reg_class.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "debug/MipsPRA.hh" #include "params/MipsISA.hh" @@ -104,6 +105,7 @@ constexpr RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); constexpr RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); constexpr RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -116,6 +118,7 @@ ISA::ISA(const Params &p) : BaseISA(p), numThreads(p.num_threads), _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/power/isa.cc b/src/arch/power/isa.cc index 80c984cfc4..ecaebade9a 100644 --- a/src/arch/power/isa.cc +++ b/src/arch/power/isa.cc @@ -41,6 +41,7 @@ #include "arch/power/regs/int.hh" #include "arch/power/regs/misc.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "params/PowerISA.hh" namespace gem5 @@ -56,6 +57,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -67,6 +69,7 @@ ISA::ISA(const Params &p) : BaseISA(p) _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); clear(); diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 3b4f378afa..6e4c380d98 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -48,6 +48,7 @@ #include "cpu/base.hh" #include "debug/Checkpoint.hh" #include "debug/LLSC.hh" +#include "debug/MatRegs.hh" #include "debug/RiscvMisc.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -235,6 +236,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -247,6 +249,7 @@ ISA::ISA(const Params &p) : _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc index 255dbb0b09..38b3d1c3e2 100644 --- a/src/arch/sparc/isa.cc +++ b/src/arch/sparc/isa.cc @@ -39,6 +39,7 @@ #include "base/trace.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "debug/Timer.hh" #include "params/SparcISA.hh" @@ -73,6 +74,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -84,6 +86,7 @@ ISA::ISA(const Params &p) : BaseISA(p) _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc index 6578b1c716..31efae3a43 100644 --- a/src/arch/x86/isa.cc +++ b/src/arch/x86/isa.cc @@ -37,6 +37,7 @@ #include "base/compiler.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "params/X86ISA.hh" #include "sim/serialize.hh" @@ -146,6 +147,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); } // anonymous namespace @@ -159,6 +161,7 @@ ISA::ISA(const X86ISAParams &p) : BaseISA(p), vendorString(p.vendor_string) _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index b7e03a6fb9..d562dd5645 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -52,6 +52,7 @@ class StaticInstFlags(Enum): "IsFloating", # References FP regs. "IsVector", # References Vector regs. "IsVectorElem", # References Vector reg elems. + "IsMatrix", # References Matrix regs. "IsLoad", # Reads from memory (load or prefetch). "IsStore", # Writes to memory. "IsAtomic", # Does atomic RMW to memory. diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index 356fdc7e01..475d650d3a 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -70,6 +70,10 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index) scoreboard_index = vecPredRegOffset + reg.index(); ret = true; break; + case MatRegClass: + scoreboard_index = matRegOffset + reg.index(); + ret = true; + break; case CCRegClass: scoreboard_index = ccRegOffset + reg.index(); ret = true; diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index bccb9c5b80..d3df324b99 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -72,6 +72,7 @@ class Scoreboard : public Named const unsigned ccRegOffset; const unsigned vecRegOffset; const unsigned vecPredRegOffset; + const unsigned matRegOffset; /** The number of registers in the Scoreboard. These * are just the integer, CC and float registers packed @@ -116,7 +117,9 @@ class Scoreboard : public Named vecRegOffset(ccRegOffset + reg_classes.at(CCRegClass)->numRegs()), vecPredRegOffset(vecRegOffset + reg_classes.at(VecElemClass)->numRegs()), - numRegs(vecPredRegOffset + reg_classes.at(VecPredRegClass)->numRegs()), + matRegOffset(vecPredRegOffset + + reg_classes.at(VecPredRegClass)->numRegs()), + numRegs(matRegOffset + reg_classes.at(MatRegClass)->numRegs()), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, invalidFUIndex), diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py index 07d9df6b7f..2e1a602e4c 100644 --- a/src/cpu/o3/BaseO3CPU.py +++ b/src/cpu/o3/BaseO3CPU.py @@ -168,6 +168,7 @@ class BaseO3CPU(BaseCPU): numPhysVecPredRegs = Param.Unsigned( 32, "Number of physical predicate registers" ) + numPhysMatRegs = Param.Unsigned(2, "Number of physical matrix registers") # most ISAs don't use condition-code regs, so default is 0 numPhysCCRegs = Param.Unsigned(0, "Number of physical cc registers") numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 48ccd94b54..d2bacaa523 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -90,6 +90,7 @@ CPU::CPU(const BaseO3CPUParams ¶ms) params.numPhysFloatRegs, params.numPhysVecRegs, params.numPhysVecPredRegs, + params.numPhysMatRegs, params.numPhysCCRegs, params.isa[0]->regClasses()), @@ -200,6 +201,8 @@ CPU::CPU(const BaseO3CPUParams ¶ms) numThreads * regClasses.at(VecRegClass)->numRegs()); assert(params.numPhysVecPredRegs >= numThreads * regClasses.at(VecPredRegClass)->numRegs()); + assert(params.numPhysMatRegs >= + numThreads * regClasses.at(MatRegClass)->numRegs()); assert(params.numPhysCCRegs >= numThreads * regClasses.at(CCRegClass)->numRegs()); diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index 72cb7356ef..ee286fc585 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -108,6 +108,7 @@ InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, reg_classes.at(VecElemClass)->numRegs() / reg_classes.at(VecRegClass)->numRegs()) + params.numPhysVecPredRegs + + params.numPhysMatRegs + params.numPhysCCRegs; //Create an entry for each physical register within the diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index dcb8f704f0..1bc7032ebd 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -53,6 +53,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, unsigned _numPhysicalVecPredRegs, + unsigned _numPhysicalMatRegs, unsigned _numPhysicalCCRegs, const BaseISA::RegClasses ®_classes) : intRegFile(*reg_classes.at(IntRegClass), _numPhysicalIntRegs), @@ -63,6 +64,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, reg_classes.at(VecRegClass)->numRegs())), vecPredRegFile(*reg_classes.at(VecPredRegClass), _numPhysicalVecPredRegs), + matRegFile(*reg_classes.at(MatRegClass), _numPhysicalMatRegs), ccRegFile(*reg_classes.at(CCRegClass), _numPhysicalCCRegs), numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs), @@ -71,12 +73,14 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, reg_classes.at(VecElemClass)->numRegs() / reg_classes.at(VecRegClass)->numRegs())), numPhysicalVecPredRegs(_numPhysicalVecPredRegs), + numPhysicalMatRegs(_numPhysicalMatRegs), numPhysicalCCRegs(_numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs + _numPhysicalVecRegs + numPhysicalVecElemRegs + _numPhysicalVecPredRegs + + _numPhysicalMatRegs + _numPhysicalCCRegs) { RegIndex phys_reg; @@ -115,6 +119,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, flat_reg_idx++); } + // The next batch of the registers are the matrix physical + // registers; put them onto the matrix free list. + for (phys_reg = 0; phys_reg < numPhysicalMatRegs; phys_reg++) { + matRegIds.emplace_back(*reg_classes.at(MatRegClass), phys_reg, + flat_reg_idx++); + } + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) { @@ -167,6 +178,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) } freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end()); + /* The next batch of the registers are the matrix physical + * registers; put them onto the matrix free list. */ + for (reg_idx = 0; reg_idx < numPhysicalMatRegs; reg_idx++) { + assert(matRegIds[reg_idx].index() == reg_idx); + } + freeList->addRegs(matRegIds.begin(), matRegIds.end()); + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) { diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 4fea589ad7..13c9899f13 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -91,6 +91,10 @@ class PhysRegFile RegFile vecPredRegFile; std::vector vecPredRegIds; + /** Matrix register file. */ + RegFile matRegFile; + std::vector matRegIds; + /** Condition-code register file. */ RegFile ccRegFile; std::vector ccRegIds; @@ -123,6 +127,11 @@ class PhysRegFile */ unsigned numPhysicalVecPredRegs; + /** + * Number of physical matrix registers + */ + unsigned numPhysicalMatRegs; + /** * Number of physical CC registers */ @@ -140,6 +149,7 @@ class PhysRegFile unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, unsigned _numPhysicalVecPredRegs, + unsigned _numPhysicalMatRegs, unsigned _numPhysicalCCRegs, const BaseISA::RegClasses &classes); @@ -218,6 +228,11 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Access to predicate register %i, has " "data %s\n", idx, vecPredRegFile.regClass.valString(val)); break; + case MatRegClass: + matRegFile.get(idx, val); + DPRINTF(IEW, "RegFile: Access to matrix register %i, has " + "data %s\n", idx, matRegFile.regClass.valString(val)); + break; case CCRegClass: *(RegVal *)val = getReg(phys_reg); break; @@ -237,6 +252,8 @@ class PhysRegFile return vectorRegFile.ptr(idx); case VecPredRegClass: return vecPredRegFile.ptr(idx); + case MatRegClass: + return matRegFile.ptr(idx); default: panic("Unrecognized register class type %d.", type); } @@ -302,6 +319,11 @@ class PhysRegFile idx, vecPredRegFile.regClass.valString(val)); vecPredRegFile.set(idx, val); break; + case MatRegClass: + DPRINTF(IEW, "RegFile: Setting matrix register %i to %s\n", + idx, matRegFile.regClass.valString(val)); + matRegFile.set(idx, val); + break; case CCRegClass: setReg(phys_reg, *(RegVal *)val); break; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index f3783d402a..f8c305eb1c 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -134,6 +134,8 @@ Rename::RenameStats::RenameStats(statistics::Group *parent) "Number of vector rename lookups"), ADD_STAT(vecPredLookups, statistics::units::Count::get(), "Number of vector predicate rename lookups"), + ADD_STAT(matLookups, statistics::units::Count::get(), + "Number of matrix rename lookups"), ADD_STAT(committedMaps, statistics::units::Count::get(), "Number of HB maps that are committed"), ADD_STAT(undoneMaps, statistics::units::Count::get(), @@ -167,6 +169,7 @@ Rename::RenameStats::RenameStats(statistics::Group *parent) fpLookups.prereq(fpLookups); vecLookups.prereq(vecLookups); vecPredLookups.prereq(vecPredLookups); + matLookups.prereq(matLookups); committedMaps.prereq(committedMaps); undoneMaps.prereq(undoneMaps); @@ -1034,6 +1037,9 @@ Rename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) case VecPredRegClass: stats.vecPredLookups++; break; + case MatRegClass: + stats.matLookups++; + break; case CCRegClass: case MiscRegClass: break; @@ -1248,7 +1254,7 @@ Rename::readFreeEntries(ThreadID tid) } DPRINTF(Rename, "[tid:%i] Free IQ: %i, Free ROB: %i, " - "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i %i)\n", + "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i %i %i)\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, @@ -1260,6 +1266,7 @@ Rename::readFreeEntries(ThreadID tid) renameMap[tid]->numFreeEntries(VecRegClass), renameMap[tid]->numFreeEntries(VecElemClass), renameMap[tid]->numFreeEntries(VecPredRegClass), + renameMap[tid]->numFreeEntries(MatRegClass), renameMap[tid]->numFreeEntries(CCRegClass)); DPRINTF(Rename, "[tid:%i] %i instructions not yet in ROB\n", diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 0b42b6eaa0..61ef476501 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -521,6 +521,7 @@ class Rename statistics::Scalar fpLookups; statistics::Scalar vecLookups; statistics::Scalar vecPredLookups; + statistics::Scalar matLookups; /** Stat for total number of committed renaming mappings. */ statistics::Scalar committedMaps; /** Stat for total number of mappings that were undone due to a diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 080c758413..37618e530a 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -64,6 +64,7 @@ enum RegClassType /** Vector Register Native Elem lane. */ VecElemClass, VecPredRegClass, + MatRegClass, ///< Matrix Register CCRegClass, ///< Condition-code register MiscRegClass, ///< Control (misc) register InvalidRegClass = -1 @@ -75,6 +76,7 @@ inline constexpr char FloatRegClassName[] = "floating_point"; inline constexpr char VecRegClassName[] = "vector"; inline constexpr char VecElemClassName[] = "vector_element"; inline constexpr char VecPredRegClassName[] = "vector_predicate"; +inline constexpr char MatRegClassName[] = "matrix"; inline constexpr char CCRegClassName[] = "condition_code"; inline constexpr char MiscRegClassName[] = "miscellaneous"; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index ab67f39496..768f63ede5 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -418,6 +418,12 @@ BaseSimpleCPU::postExecute() t_info.execContextStats.numVecInsts++; } + //Matrix alu accesses + if (curStaticInst->isMatrix()){ + t_info.execContextStats.numMatAluAccesses++; + t_info.execContextStats.numMatInsts++; + } + //number of function calls/returns to get window accesses if (curStaticInst->isCall() || curStaticInst->isReturn()){ t_info.execContextStats.numCallsReturns++; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index e51ec88dce..0f20763f28 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -96,6 +96,8 @@ class SimpleExecContext : public ExecContext "Number of float alu accesses"), ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), "Number of vector alu accesses"), + ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), + "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), "Number of times a function call or return occured"), ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(), @@ -106,6 +108,8 @@ class SimpleExecContext : public ExecContext "Number of float instructions"), ADD_STAT(numVecInsts, statistics::units::Count::get(), "Number of vector instructions"), + ADD_STAT(numMatInsts, statistics::units::Count::get(), + "Number of matrix instructions"), ADD_STAT(numIntRegReads, statistics::units::Count::get(), "Number of times the integer registers were read"), ADD_STAT(numIntRegWrites, statistics::units::Count::get(), @@ -162,6 +166,7 @@ class SimpleExecContext : public ExecContext &numVecRegReads, &numVecRegReads, &numVecPredRegReads, + &numMatRegReads, &numCCRegReads }, numRegWrites{ @@ -170,6 +175,7 @@ class SimpleExecContext : public ExecContext &numVecRegWrites, &numVecRegWrites, &numVecPredRegWrites, + &numMatRegWrites, &numCCRegWrites } { @@ -220,6 +226,9 @@ class SimpleExecContext : public ExecContext // Number of vector alu accesses statistics::Scalar numVecAluAccesses; + // Number of matrix alu accesses + statistics::Scalar numMatAluAccesses; + // Number of function calls/returns statistics::Scalar numCallsReturns; @@ -235,6 +244,9 @@ class SimpleExecContext : public ExecContext // Number of vector instructions statistics::Scalar numVecInsts; + // Number of matrix instructions + statistics::Scalar numMatInsts; + // Number of integer register file accesses statistics::Scalar numIntRegReads; statistics::Scalar numIntRegWrites; @@ -251,6 +263,10 @@ class SimpleExecContext : public ExecContext mutable statistics::Scalar numVecPredRegReads; statistics::Scalar numVecPredRegWrites; + // Number of matrix register file accesses + mutable statistics::Scalar numMatRegReads; + statistics::Scalar numMatRegWrites; + // Number of condition code register file accesses statistics::Scalar numCCRegReads; statistics::Scalar numCCRegWrites; diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 4c4e7dcdb6..c28359a4ed 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -75,6 +75,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, {*_isa->regClasses().at(VecRegClass)}, {*_isa->regClasses().at(VecElemClass)}, {*_isa->regClasses().at(VecPredRegClass)}, + {*_isa->regClasses().at(MatRegClass)}, {*_isa->regClasses().at(CCRegClass)} }}, isa(_isa), diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 5a60d2ac16..b9129734f1 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -57,6 +57,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/MatRegs.hh" #include "debug/VecPredRegs.hh" #include "debug/VecRegs.hh" #include "mem/htm.hh" diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 3ab78345bc..7ecc57d2f0 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -155,6 +155,7 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isInteger() const { return flags[IsInteger]; } bool isFloating() const { return flags[IsFloating]; } bool isVector() const { return flags[IsVector]; } + bool isMatrix() const { return flags[IsMatrix]; } bool isControl() const { return flags[IsControl]; } bool isCall() const { return flags[IsCall]; } diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index 69094f87af..146f9e74d0 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -109,6 +109,20 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) } } + // Then loop through the matrix registers. + const auto *mat_class = regClasses.at(MatRegClass); + std::vector mat1(mat_class->regBytes()); + std::vector mat2(mat_class->regBytes()); + for (auto &id: *regClasses.at(MatRegClass)) { + one->getReg(id, mat1.data()); + two->getReg(id, mat2.data()); + if (mat1 != mat2) { + panic("Mat reg idx %d doesn't match, one: %#x, two: %#x", + id.index(), mat_class->valString(mat1.data()), + mat_class->valString(mat2.data())); + } + } + for (int i = 0; i < regClasses.at(MiscRegClass)->numRegs(); ++i) { RegVal t1 = one->readMiscRegNoEffect(i); RegVal t2 = two->readMiscRegNoEffect(i); From 5c43523d53d0fbada0f48d1cefbcadd9959d8068 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 9 Aug 2022 16:42:01 +0100 Subject: [PATCH 145/492] arch-arm: Add matrix register support for SME We add support for the matrix registers to the Arm architecture. This will be used to implement support for Arm's Scalable Matrix Extension (SME) in subsequent commits. We add an implementation of a matrix register for the Arm architecture. These are akin to 2D vector registers in the sense that they can be dynamically viewed as a variety of element sizes. As widening the element size would reduce the matrix size by a factor of element size, we instead layer multiple tiles of wider elements onto the underlying matrix storage in order to retain square matrices. We separate the storage of the matrix from the different views one can have. The potential views are: * Tiles: View the matrix as one or more tiles using a specified element size. As the element size increases the number of indexable tiles increases. When using the smallest granularity element size (bytes) there is a single tile. As an example, using 32-bit elements yields 4 tiles. Tiles are interleaved onto the underlaying matrix modulo element size. A tile supports 2D indexing ([][]), with the first index specifying the row index, and the second the column (element index within the row). * A Horizontal/Vertical slice (row or a column) of a tile: Take the aforementioned tile, and extract a specified row or column slice from it. A slice supports standard []-based indexing. A tile slice must use the same underlying element type as is used for the tile. * A Horizontal/Vertical slice (row or column) of the underlying matrix storage: Treat the matrix register as an array of vectors (rows or columns, rows preferred due to them being indepependent of the element size being used). On simulator start-up the matrix registers are initialised to a maximum size. At run-time the used size can by dynamically adjusted. However, please note that as the matrix register class doesn't know if a smaller size is being used, the class itself doesn't do any bounds checking itself. This is left to the user. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I6a6a05154846e4802e9822bbbac00ab2c39538ed Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64334 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- src/arch/arm/SConscript | 1 + src/arch/arm/isa.cc | 7 +- src/arch/arm/isa.hh | 3 +- src/arch/arm/matrix.hh | 572 ++++++++++++++++++++++++++++++++++++ src/arch/arm/matrix.test.cc | 453 ++++++++++++++++++++++++++++ src/arch/arm/regs/mat.hh | 136 +++++++++ src/arch/arm/types.hh | 12 + 7 files changed, 1182 insertions(+), 2 deletions(-) create mode 100644 src/arch/arm/matrix.hh create mode 100644 src/arch/arm/matrix.test.cc create mode 100644 src/arch/arm/regs/mat.hh diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index 6c359fb255..935f082c11 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -54,6 +54,7 @@ if env['USE_ARM_ISA']: '../../cpu/reg_class.cc', '../../sim/bufval.cc', '../../sim/cur_tick.cc', 'regs/int.cc') + GTest('matrix.test', 'matrix.test.cc') Source('decoder.cc', tags='arm isa') Source('faults.cc', tags='arm isa') Source('htm.cc', tags='arm isa') diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 617f144bae..c6bb2bd8d1 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -76,7 +76,6 @@ namespace /* Not applicable to ARM */ RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs); -RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); } // anonymous namespace @@ -561,6 +560,12 @@ ISA::copyRegsFrom(ThreadContext *src) for (auto &id: vecElemClass) tc->setReg(id, src->getReg(id)); + ArmISA::MatRegContainer mc; + for (auto &id: matRegClass) { + src->getReg(id, &mc); + tc->setReg(id, &mc); + } + // setMiscReg "with effect" will set the misc register mapping correctly. // e.g. updateRegMap(val) tc->setMiscReg(MISCREG_CPSR, src->readMiscRegNoEffect(MISCREG_CPSR)); diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index 9e1afa714b..6f9478298d 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2021 ARM Limited + * Copyright (c) 2010, 2012-2022 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,6 +45,7 @@ #include "arch/arm/mmu.hh" #include "arch/arm/pcstate.hh" #include "arch/arm/regs/int.hh" +#include "arch/arm/regs/mat.hh" #include "arch/arm/regs/misc.hh" #include "arch/arm/regs/vec.hh" #include "arch/arm/self_debug.hh" diff --git a/src/arch/arm/matrix.hh b/src/arch/arm/matrix.hh new file mode 100644 index 0000000000..ae9a8e5627 --- /dev/null +++ b/src/arch/arm/matrix.hh @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2022 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file arch/arm/matrix.hh + * Matrix Register Specification. + * + * In this file we add three new classes which are used to provide both + * the backing storage for matrix registers (MatStore) and for accessing + * them using a set of views onto the backing store (Tile, TileSlice). + * + * The MatStore provides the backing store for the matrix, handles the + * serialisation/unserialisation, and provides interfaces to obtain + * views of the matrix. The underlying element for the MatStore is a + * byte, and it uses two templated parameters, X and Y, to set the + * overall size of the matrix. The common use case will be that X and Y + * are the same size, yielding a square matrix, but this is not a + * requirement - it is possible to create non-square matricies too if + * such a thing is desired. + * + * The Tile provides a view on top of the MatStore which is intended to + * preserve the original aspect ratio of the underlying MatStore as the + * element size scales. It does so by row-wise interleaving one or more + * sub-matrices on top of the MatStore, where the number of sub-matrices + * is governed by the element size (in bytes) itself. As an example, if + * the elements are half-words, i.e. 2 bytes wide, then there are two + * interleaved matrices with even rows belonging to sub-matrix 0 and odd + * rows belonging to sub-matrix 1. However, each of these sub-matricies + * maintains the original aspect ratio of the MatStore - the element + * size has doubled (bytes => half words), hence each row contains half + * the original number of elements, and each sub-matrix contains half of + * the number of rows themselves. + * + * The TileSlice class provides a view of either a row or a column of a + * matrix, and can be generated from either the MatStore directly, or + * from the Tile. In the former case this allows a matrix to be viewed + * as a set of rows or columns, and in the latter this same approach is + * applied to the Tile. In both cases this is achieved by adjusting the + * striding through the backing store accordingly. + * + * The intended usage of the views is as follows: + * + * // declare an 8x8 matrix of bytes + * using Mat8x8 = MatStore<8, 8>; + * + * // Create a matrix and make sure that it is zeroed + * Mat8x8 mat; + * mat.zero(); + * + * // Interleave four tiles of int32_t onto the 8x8 matrix, and get + * // tile 0. (Each of these tiles will be a 2x2 matrix) + * auto mat0 = mat.asTile(0); + * + * // Set both elements of row 0 to 10 + * for (auto i = 0; i < 2; ++i) { + * mat0[0][i] = 10; + * } + * + * // Sum both elements of row 1 + * int32_t sum = 0; + * auto row = mat0.asHSlice(1); + * for (auto i = 0; i < 2; ++i) { + * sum += row[i]; + * } + * + * // print column 1 of the whole MatStore when viewed as uint16_t + * col = mat.asVSlice(1); + * for (auto i = 0; i < 4; ++i) { + * std::cout << col[i] << std::endl; + * } + * + */ + +#ifndef __ARCH_ARM_MATRIX_HH__ +#define __ARCH_ARM_MATRIX_HH__ + +#include +#include +#include +#include +#include + +#include "base/cprintf.hh" +#include "base/logging.hh" +#include "base/types.hh" +#include "sim/serialize_handlers.hh" + +namespace gem5 +{ + +constexpr unsigned MaxMatRegRowLenInBytes = 256; +constexpr unsigned MaxMatRegRows = 256; + +// Forward declarations +template +class MatStore; +template +class Tile; + +template +struct ParseParam>; + +/** + * @brief Provides a view of a horizontal slice of either a + * MatStore or a Tile. + * + * Based on whether this view it is being used from the MatStore + * directly or from the Tile different parameters are + * used. Behind the scenes the parameters are used to stride through the + * (linear) backing store in order to return or maniplate the desired + * elements of the row/column. + * + * @tparam ElemType The type of element to use for the view. + * @tparam Container The type of container being used as the backing store. + * @tparam FromTile Set true if operating on an interleaved tile. + */ +template +class HorizontalSlice +{ + template friend class MatStore; + template friend class Tile; + + private: + Container * container; + size_t index; + size_t xElems; + size_t yElems; + size_t startElts; + size_t strideElts; + + private: + HorizontalSlice(Container& cnt, size_t _startBytes, size_t _strideBytes, + size_t idx) + : container(&cnt), index(idx), + xElems(container->xSize() / sizeof(ElemType)), + yElems(container->ySize() / (FromTile ? sizeof(ElemType): 1)), + startElts(_startBytes / sizeof(ElemType)), + strideElts(_strideBytes / sizeof(ElemType)) + { + gem5_assert(xElems > 0, "The number of xElems cannot be 0"); + gem5_assert(yElems > 0, "The number of yElems cannot be 0"); + + // Make sure that we have a whole multiple of an element size + assert (_startBytes % sizeof(ElemType) == 0); + assert (_strideBytes % sizeof(ElemType) == 0); + + if constexpr (!FromTile) { + // If we are not operating on a tile, the stride must be the + // same as the row length, X. + assert(_strideBytes == container->xSize()); + } else { + // If we are operating on a tile, then the stride must be + // sizeof(ElemSize) greater than X. + assert(_strideBytes / container->xSize() == sizeof(ElemType)); + } + }; + + public: + ElemType& + operator[](size_t elem_idx) + { + assert(elem_idx < xElems); + size_t linear_index = startElts + index * strideElts + elem_idx; + return container->template rawPtr()[linear_index]; + }; + + void + zero() + { + for (int i = 0; i < xElems; ++i) { + (*this)[i] = (ElemType)0; + } + }; +}; + +/** + * @brief Provides a view of a vertical slice of either a + * MatStore or a Tile. + * + * Based on whether this view it is being used from the MatStore + * directly or from the Tile different parameters are used. Behind the + * scenes the parameters are used to stride through the (linear) backing + * store in order to return or maniplate the desired elements of the + * row/column. + * + * @tparam ElemType The type of element to use for the view. + * @tparam Container The type of container being used as the backing store. + * @tparam FromTile Set true if operating on an interleaved tile. + */ +template +class VerticalSlice +{ + template friend class MatStore; + template friend class Tile; + + private: + Container * container; + size_t index; + size_t xElems; + size_t yElems; + size_t startElts; + size_t strideElts; + + private: + VerticalSlice(Container& cnt, size_t _startBytes, size_t _strideBytes, size_t idx) + : container(&cnt), index(idx), + xElems(container->xSize() / sizeof(ElemType)), + yElems(container->ySize() / (FromTile ? sizeof(ElemType): 1)), + startElts(_startBytes / sizeof(ElemType)), + strideElts(_strideBytes / sizeof(ElemType)) + { + gem5_assert(xElems > 0, "The number of xElems cannot be 0"); + gem5_assert(yElems > 0, "The number of yElems cannot be 0"); + + // Make sure that we have a whole multiple of an element size + assert (_startBytes % sizeof(ElemType) == 0); + assert (_strideBytes % sizeof(ElemType) == 0); + + if constexpr (!FromTile) { + // If we are not operating on a tile, the stride must be the + // same as the row length, X. + assert(_strideBytes == container->xSize()); + } else { + // If we are operating on a tile, then the stride must be + // sizeof(ElemSize) greater than X. + assert(_strideBytes / container->xSize() == sizeof(ElemType)); + } + }; + + public: + ElemType& + operator[](size_t elem_idx) + { + assert(elem_idx < yElems); + size_t linear_index = startElts + elem_idx * strideElts + index; + return container->template rawPtr()[linear_index]; + }; + + void + zero() + { + for (int i = 0; i < yElems; ++i) { + (*this)[i] = (ElemType)0; + } + }; +}; + +/** + * @brief Provides a view of a matrix that is row-interleaved onto a + * MatStore. + * + * This class largely acts as a shim between the MatStore and the + * TileSlice view. The size of the ElemType and the index passed to the + * constructor are used to calculate the stride and start which are + * passed to the TileSlice view to control how it strides through the + * backing store. + * + * @tparam ElemType The type of element to use for the view. + * @tparam Container The type of container being used as the backing store. + */ +template +class Tile +{ + template friend class MatStore; + + // We "calculate" the number of possible tiles based on the element size + static constexpr size_t NUM_TILES = sizeof(ElemType); + + private: + Container * container; + size_t index; + size_t startBytes; + size_t strideBytes; + + private: + Tile(Container& cnt, size_t idx) + : container(&cnt), index(idx) + { + assert(index < NUM_TILES); + startBytes = container->xSize() * index; + strideBytes = NUM_TILES * container->xSize(); + }; + + public: + auto + operator[](size_t idx) + { + assert(idx < (container->ySize() / NUM_TILES)); + return asHSlice(idx); + }; + + Container* + getContainer() + { + return container; + }; + + auto + asHSlice(size_t row_idx) + { + assert(row_idx < container->ySize() / NUM_TILES); + return HorizontalSlice(*container, + startBytes, + strideBytes, + row_idx); + }; + + auto + asVSlice(size_t col_idx) + { + assert(col_idx < container->xSize()); + return VerticalSlice(*container, startBytes, + strideBytes, col_idx); + }; + + void + zero() + { + for (int i = 0; i < container->ySize() / NUM_TILES; ++i) { + // We zero the tile by rows. We need to do it this way due + // to the interleaving. + auto row = this->asHSlice(i); + row.zero(); + } + }; +}; + +// Base container class for a matrix. Allows for non-square matricies. +/** + * @brief Backing store for matrices. + * + * This class provides the backing store for matricies, and is largely a + * wrapper around an std::array of bytes. This class provides some basic + * interfaces for assignment (copy the backing store) and comparison, + * and provides the interface for generating views onto the backing + * store. It is these views that are intended to be used by the end-user + * of the matrix in most cases. + * + * This class is also responsible for handling the + * serialisation/unserialisation of matrix registers (see ShowParam and + * ParseParam). + * + * @tparam X X size in bytes (number of columns). + * @tparam Y Y size in bytes (number of rows). + */ +template +class MatStore +{ + static_assert(X > 0, "X size cannot be 0"); + static_assert(Y > 0, "Y size cannot be 0"); + + static constexpr size_t LINEAR_SIZE = X * Y; + + template friend class HorizontalSlice; + template friend class VerticalSlice; + + public: + static constexpr inline size_t xSize() { return X; }; + static constexpr inline size_t ySize() { return Y; }; + static constexpr inline size_t linearSize() { return LINEAR_SIZE; }; + + using Container = std::array; + using MyClass = MatStore; + private: + // We need to be able to handle 128-bit types; align accordingly + alignas(16) Container container; + + public: + /** Constructor */ + MatStore() {}; + + MatStore(const MatStore&) = default; + + void + zero() + { + memset(container.data(), 0 , LINEAR_SIZE); + } + + /** Assignment operators. */ + /** @{ */ + /** From MatStore */ + MyClass& + operator=(const MyClass& that) + { + if (&that == this) + return *this; + memcpy(container.data(), that.container.data(), LINEAR_SIZE); + return *this; + } + /** @} */ + + /** Equality operator. + * Required to compare thread contexts. + */ + template + inline bool + operator==(const MatStore& that) const + { + return X == X2 && Y == Y2 && + !memcmp(container.data(), that.container.data(), LINEAR_SIZE); + } + + /** Inequality operator. + * Required to compare thread contexts. + */ + template + bool + operator!=(const MatStore& that) const + { + return !operator==(that); + } + + private: + /** Get pointer to the raw data. */ + template + const ElemType* rawPtr() const + { + return reinterpret_cast(container.data()); + } + + template + ElemType* rawPtr() { return reinterpret_cast(container.data()); } + + public: + template + auto + asTile(size_t index) + { + return Tile(*this, index); + } + + template + auto + asHSlice(size_t row_idx) + { + return HorizontalSlice(*this, 0, X, row_idx); + } + + template + auto + asVSlice(size_t col_idx) + { + return VerticalSlice(*this, 0, X, col_idx); + } + + friend std::ostream& + operator<<(std::ostream& os, const MatStore& v) + { + // When printing for human consumption, break into 4 byte chunks. + ccprintf(os, "["); + size_t count = 0; + for (auto& b: v.container) { + if (count && (count % 4) == 0) + os << "_"; + ccprintf(os, "%02x", b); + count++; + } + ccprintf(os, "]"); + return os; + } + + /** @} */ + /** + * Used for serialization/unserialisation. + */ + friend ParseParam>; + friend ShowParam>; + +}; + +/** + * Calls required for serialization/deserialization + */ +/** @{ */ +template +struct ParseParam> +{ + static bool + parse(const std::string &str, MatStore &value) + { + fatal_if(str.size() > 2 * X * Y, + "Matrix register value overflow at unserialize"); + fatal_if(str.size() < 2 * X * Y, + "Matrix register value underflow at unserialize"); + + for (int i = 0; i < X * Y; i++) { + uint8_t b = 0; + if (2 * i < str.size()) + b = stoul(str.substr(i * 2, 2), nullptr, 16); + value.template rawPtr()[i] = b; + } + return true; + } +}; + +template +struct ShowParam> +{ + static void + show(std::ostream &os, const MatStore &value) + { + for (auto& b: value.container) + ccprintf(os, "%02x", b); + } +}; +/** @} */ + +/** + * Dummy type aliases and constants for architectures that do not + * implement matrix registers. + */ +/** @{ */ +struct DummyMatRegContainer +{ + RegVal filler = 0; + bool operator == (const DummyMatRegContainer &d) const { return true; } + bool operator != (const DummyMatRegContainer &d) const { return true; } +}; +template <> +struct ParseParam +{ + static bool + parse(const std::string &s, DummyMatRegContainer &value) + { + return false; + } +}; +static_assert(sizeof(DummyMatRegContainer) == sizeof(RegVal)); +static inline std::ostream & +operator<<(std::ostream &os, const DummyMatRegContainer &d) +{ + return os; +} +/** @} */ + +} // namespace gem5 + +#endif // __ARCH_ARM_MATRIX_HH__ diff --git a/src/arch/arm/matrix.test.cc b/src/arch/arm/matrix.test.cc new file mode 100644 index 0000000000..16d3da7048 --- /dev/null +++ b/src/arch/arm/matrix.test.cc @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2022 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "arch/arm/matrix.hh" + +using namespace gem5; + +TEST(Matrix, Size) +{ + { + // Minimum size + MatStore<1, 1> mat; + ASSERT_EQ(1, mat.linearSize()); + } + + { + // Medium size + constexpr size_t x_size = MaxMatRegRowLenInBytes / 2; + constexpr size_t y_size = MaxMatRegRows / 2; + MatStore mat; + ASSERT_EQ(x_size * y_size, mat.linearSize()); + } + + { + // Maximum size + MatStore mat; + ASSERT_EQ(MaxMatRegRowLenInBytes * MaxMatRegRows, mat.linearSize()); + } +} + +TEST(Matrix, Zero) +{ + constexpr size_t size = 16; + MatStore mat; + auto tile = mat.asTile(0); + + // Initializing with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + tile[i][j] = 0xAA; + } + } + + // zeroing the matrix + mat.zero(); + + // checking if every matrix element is set to zero + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + ASSERT_EQ(tile[i][j], 0); + } + } +} + +TEST(Matrix, ZeroTiles) +{ + constexpr size_t size = 16; + MatStore mat; + auto byte_tile = mat.asTile(0); + + // Initializing the whole tile with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + byte_tile[i][j] = 0xAA; + } + } + + // zeroing the half-word tile 0 of matrix + auto half_word_tile = mat.asTile(0); + half_word_tile.zero(); + + // Check that every element of half-word tile 0 is zero + for (auto i = 0; i < size / 2; i++) { + for (auto j = 0; j < size / 2; j++) { + ASSERT_EQ(half_word_tile[i][j], 0); + } + } + + // Check that every element of half-word tile 1 is 0xAAAA (note the + // double width of the element) + half_word_tile = mat.asTile(1); + for (auto i = 0; i < size / 2; i++) { + for (auto j = 0; j < size / 2; j++) { + ASSERT_EQ(half_word_tile[i][j], 0xAAAA); + } + } + + // Check if every matrix element on an even row is set to zero + for (auto i = 0; i < size; i += 2) { + for (auto j = 0; j < size; j++) { + ASSERT_EQ(byte_tile[i][j], 0); + } + } + + // Check if every matrix element on an odd row is set to 0xAA + for (auto i = 1; i < size; i += 2) { + for (auto j = 0; j < size; j++) { + ASSERT_EQ(byte_tile[i][j], 0xAA); + } + } +} + +TEST(Matrix, ZeroTileHSlice) +{ + constexpr size_t size = 16; + MatStore mat; + auto byte_tile = mat.asTile(0); + + // Initializing the whole tile with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + byte_tile[i][j] = 0xAA; + } + } + + // zeroing the 0th row of half-word tile 0 + auto half_word_tile = mat.asTile(0); + auto row = half_word_tile.asHSlice(0); + row.zero(); + + // Check that every element of the row is zero + for (auto i = 0; i < size / 2; i++) { + ASSERT_EQ(row[i], 0); + } + + // Check that every element of row 1 is 0xAAAA + row = half_word_tile.asHSlice(1); + for (auto i = 0; i < size / 2; i++) { + ASSERT_EQ(row[i], 0xAAAA); + } + + // Check that row 0 of the byte tile is zero, and that all remaining + // rows are unaffected + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + if (i == 0) { + ASSERT_EQ(byte_tile[i][j], 0); + } else { + ASSERT_EQ(byte_tile[i][j], 0xAA); + } + } + } +} + +TEST(Matrix, ZeroTileVSlice) +{ + constexpr size_t size = 16; + MatStore mat; + auto byte_tile = mat.asTile(0); + + // Initializing the whole tile with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + byte_tile[i][j] = 0xAA; + } + } + + // zeroing the 0th column of half-word tile 0 + auto half_word_tile = mat.asTile(0); + auto col = half_word_tile.asVSlice(0); + col.zero(); + + // Check that every element of the column is zero + for (auto i = 0; i < size / 2; i++) { + ASSERT_EQ(col[i], 0); + } + + // Check that every element of column 1 is 0xAAAA + col = half_word_tile.asVSlice(1); + for (auto i = 0; i < size / 2; i++) { + ASSERT_EQ(col[i], 0xAAAA); + } + + // Check that elements 0 & 1 of the byte tile are zero for even rows, + // and that all remaining elements are unaffected + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + if (i % 2 == 0 && (j == 0 || j == 1)) { + ASSERT_EQ(byte_tile[i][j], 0); + } else { + ASSERT_EQ(byte_tile[i][j], 0xAA); + } + } + } +} + +TEST(Matrix, ZeroHSlice) +{ + constexpr size_t size = 16; + MatStore mat; + auto byte_tile = mat.asTile(0); + + // Initializing the whole tile with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + byte_tile[i][j] = 0xAA; + } + } + + // Now we get a row directly from the matrix (as words, because it + // should make no difference), zero it + auto row = mat.asHSlice(4); + row.zero(); + + // Check that every element of the row is zero + for (auto i = 0; i < size / 4; i++) { + ASSERT_EQ(row[i], 0); + } + + // Check that row 4 of the byte tile is zero, and that all remaining + // rows are unaffected + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + if (i == 4) { + ASSERT_EQ(byte_tile[i][j], 0); + } else { + ASSERT_EQ(byte_tile[i][j], 0xAA); + } + } + } +} + +TEST(Matrix, ZeroVSlice) +{ + constexpr size_t size = 16; + MatStore mat; + auto byte_tile = mat.asTile(0); + + // Initializing the whole tile with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + byte_tile[i][j] = 0xAA; + } + } + + // Now we get a column directly from the matrix, zero it + auto col = mat.asVSlice(4); + col.zero(); + + // Check that every element of the column is zero + for (auto i = 0; i < size; i++) { + ASSERT_EQ(col[i], 0); + } + + // Check that col 4 of the byte tile is zero, and that all remaining + // rows are unaffected + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + if (j == 4) { + ASSERT_EQ(byte_tile[i][j], 0); + } else { + ASSERT_EQ(byte_tile[i][j], 0xAA); + } + } + } + + // Now we repeat with a wider element type too. Reinitializing the + // whole tile with non-zero value + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + byte_tile[i][j] = 0xAA; + } + } + + // Now we get a word-wide column directly from the matrix, zero it + auto wide_col = mat.asVSlice(1); + wide_col.zero(); + + // Check that every element of the column is zero + for (auto i = 0; i < size; i++) { + ASSERT_EQ(wide_col[i], 0); + } + + // Check that cols 4-7 of the byte tile are zero, and that all + // remaining rows are unaffected + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + if (j >= 4 && j <= 7) { + ASSERT_EQ(byte_tile[i][j], 0); + } else { + ASSERT_EQ(byte_tile[i][j], 0xAA); + } + } + } +} + +class TwoDifferentMatRegs : public testing::Test +{ + protected: + static constexpr size_t size = 4; + + MatStore mat1; + MatStore mat2; + + void + SetUp() override + { + auto tile1 = mat1.asTile(0); + auto tile2 = mat2.asTile(0); + + // Initializing with non-zero value for matrix 1 + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + tile1[i][j] = 0xAA; + } + } + + // Initializing with zero value for matrix 2 + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + tile2[i][j] = 0x0; + } + } + } +}; + +// Testing operator= +TEST_F(TwoDifferentMatRegs, Assignment) +{ + // Copying the matrix + mat2 = mat1; + + auto tile2 = mat2.asTile(0); + + // Checking if matrix 2 elements are 0xAA + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + ASSERT_EQ(tile2[i][j], 0xAA); + } + } +} + +// Testing operator== +TEST_F(TwoDifferentMatRegs, Equality) +{ + // Equality check + ASSERT_TRUE(mat1 == mat1); + ASSERT_TRUE(mat2 == mat2); + ASSERT_FALSE(mat1 == mat2); +} + +// Testing operator!= +TEST_F(TwoDifferentMatRegs, Inequality) +{ + // Inequality check + ASSERT_FALSE(mat1 != mat1); + ASSERT_FALSE(mat2 != mat2); + ASSERT_TRUE(mat1 != mat2); +} + +// Testing operator<< +TEST_F(TwoDifferentMatRegs, Printing) +{ + { + std::ostringstream stream; + stream << mat1; + ASSERT_EQ(stream.str(), "[aaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaaa]"); + } + + { + std::ostringstream stream; + stream << mat2; + ASSERT_EQ(stream.str(), "[00000000_00000000_00000000_00000000]"); + } +} + +// Testing ParseParam +TEST_F(TwoDifferentMatRegs, ParseParam) +{ + ParseParam parser; + + parser.parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", mat1); + parser.parse("cccccccccccccccccccccccccccccccc", mat2); + + for (auto i = 0; i < size; i++) { + for (auto j = 0; j < size; j++) { + ASSERT_EQ(mat1.asTile(0)[i][j], 0xbb); + ASSERT_EQ(mat2.asTile(0)[i][j], 0xcc); + } + } +} + +// Testing ParseParam Underflow +TEST_F(TwoDifferentMatRegs, ParseParamUnderflow) +{ + ParseParam parser; + + // We should trigger a fatal() here. + EXPECT_ANY_THROW(parser.parse("b", mat1)); +} + +// Testing ParseParam Overflow +TEST_F(TwoDifferentMatRegs, ParseParamOverflow) +{ + ParseParam parser; + + // We should trigger a fatal() here. + EXPECT_ANY_THROW(parser.parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", mat1)); +} + +// Testing ShowParam +TEST_F(TwoDifferentMatRegs, ShowParam) +{ + ShowParam parser; + + { + std::stringstream ss; + parser.show(ss, mat1); + ASSERT_EQ(ss.str(), "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + } + + { + std::stringstream ss; + parser.show(ss, mat2); + ASSERT_EQ(ss.str(), "00000000000000000000000000000000"); + } +} diff --git a/src/arch/arm/regs/mat.hh b/src/arch/arm/regs/mat.hh new file mode 100644 index 0000000000..196e876f92 --- /dev/null +++ b/src/arch/arm/regs/mat.hh @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2022 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_ARM_REGS_MAT_HH__ +#define __ARCH_ARM_REGS_MAT_HH__ + +#include "arch/arm/types.hh" +#include "arch/arm/matrix.hh" +#include "cpu/reg_class.hh" +#include "debug/MatRegs.hh" + +namespace gem5 +{ + +namespace ArmISA +{ + +/* + * We do the same as is done for vector registers when creating the + * matricies. One of the things to note is that this allocates the + * largest architecturally possible matrix - this is a bit inefficient + * from a memory point of view, but at this point we do not know which + * vector length will be chosen (and this can potentially vary during + * runtime). + */ +using MatRegContainer = gem5::MatStore; + +template +using MatTile = gem5::Tile; + +template +using MatTileRow = gem5::HorizontalSlice; + +template +using MatTileCol = gem5::VerticalSlice; + +template +using MatRow = gem5::HorizontalSlice; + +template +using MatCol = gem5::VerticalSlice; + +// SME ZA tile, i.e. matrix +const int NumMatrixRegs = 1; + +static inline TypedRegClassOps matRegClassOps; + +inline constexpr RegClass matRegClass = + RegClass(MatRegClass, MatRegClassName, NumMatrixRegs, debug::MatRegs). + ops(matRegClassOps). + regType(); + +/* + * Helpers for providing access to the different views of a matrix + * register. Intended to be called from the instruction implementations + * themselves. + */ +template +MatTile +getTile(MatRegContainer ®, uint8_t tile_idx) +{ + return reg.asTile(tile_idx); +} +template +MatTileRow +getTileHSlice(MatRegContainer ®, uint8_t tile_idx, uint8_t row_idx) +{ + return reg.asTile(tile_idx).asHSlice(row_idx); +} +template +MatTileCol +getTileVSlice(MatRegContainer ®, uint8_t tile_idx, uint8_t col_idx) +{ + return reg.asTile(tile_idx).asVSlice(col_idx); +} +template +MatRow +getHSlice(MatRegContainer ®, uint8_t row_idx) +{ + return reg.asHSlice(row_idx); +} +template +MatCol +getVSlice(MatRegContainer ®, uint8_t col_idx) +{ + return reg.asVSlice(col_idx); +} + +} // namespace ArmISA +} // namespace gem5 + +#endif diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index 44b20476d2..50db9bc9af 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -472,6 +472,18 @@ namespace ArmISA constexpr unsigned VecRegSizeBytes = MaxSveVecLenInBytes; constexpr unsigned VecPredRegSizeBits = MaxSveVecLenInBytes; + constexpr unsigned MaxSmeVecLenInBits = 2048; + static_assert(MaxSmeVecLenInBits >= 128 && + MaxSmeVecLenInBits <= 2048 && + // Only powers of two are supported. We don't need to + // check for the zero case here as we already know it + // is over 128. + (MaxSmeVecLenInBits & (MaxSmeVecLenInBits - 1)) == 0, + "Unsupported max. SME vector length"); + constexpr unsigned MaxSmeVecLenInBytes = MaxSmeVecLenInBits >> 3; + constexpr unsigned MaxSmeVecLenInWords = MaxSmeVecLenInBits >> 5; + constexpr unsigned MaxSmeVecLenInDWords = MaxSmeVecLenInBits >> 6; + } // namespace ArmISA } // namespace gem5 From dfd151d52d10c85cc0503fa60d3169b3d32456c1 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 13:40:02 +0100 Subject: [PATCH 146/492] arch-arm: Add system registers added/used by SME We add the following registers which are added by SME: * ID_AA64SMFR0_EL1 * SVCR * SMIDR_EL1 * SMPRI_EL1 * SMPRIMAP_EL2 * SMCR_EL3 * SMCR_EL2 * SMCR_EL12 * SMCR_EL1 * TPIDR2_EL0 * MPAMSM_EL1 In addition we extend some of the existing registers with SME support (SCR_EL3, CPACR_EL1, CPTR_EL2, CPTR_EL3, etc). These regisers are responsible for enabling SME itself, or for configuring the trapping behaviour for the differernt ELs. In addition we implement some dummy registers as they are officially required by SME, but gem5 itself doesn't actually support the features yet (FGT, HCX). Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I18ba65fb9ac2b7a4b4f361998564fb5d472d1789 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64335 Tested-by: kokoro Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini --- src/arch/arm/ArmISA.py | 13 +++-- src/arch/arm/ArmSystem.py | 26 +++++++++ src/arch/arm/isa.cc | 97 ++++++++++++++++++++++++++++++++- src/arch/arm/isa.hh | 3 + src/arch/arm/regs/misc.cc | 52 +++++++++++++++++- src/arch/arm/regs/misc.hh | 35 ++++++++++++ src/arch/arm/regs/misc_types.hh | 35 +++++++++++- src/arch/arm/system.cc | 1 + src/arch/arm/system.hh | 8 ++- 9 files changed, 261 insertions(+), 9 deletions(-) diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 4f336e04cd..e73046d08b 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013, 2015-2021 ARM Limited +# Copyright (c) 2012-2013, 2015-2022 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -38,7 +38,7 @@ from m5.proxy import * from m5.SimObject import SimObject from m5.objects.ArmPMU import ArmPMU -from m5.objects.ArmSystem import SveVectorLength, ArmRelease +from m5.objects.ArmSystem import SveVectorLength, SmeVectorLength, ArmRelease from m5.objects.BaseISA import BaseISA # Enum for DecoderFlavor @@ -58,6 +58,8 @@ class ArmDefaultSERelease(ArmRelease): "FEAT_FCMA", "FEAT_JSCVT", "FEAT_PAuth", + # Armv9.2 + "FEAT_SME", # Other "TME", ] @@ -160,11 +162,14 @@ class ArmISA(BaseISA): "Any access to a MISCREG_IMPDEF_UNIMPL register is executed as NOP", ) - # This is required because in SE mode a generic System SimObject is - # allocated, instead of an ArmSystem + # These are required because in SE mode a generic System SimObject + # is allocated, instead of an ArmSystem sve_vl_se = Param.SveVectorLength( 1, "SVE vector length in quadwords (128-bit), SE-mode only" ) + sme_vl_se = Param.SmeVectorLength( + 1, "SME vector length in quadwords (128-bit), SE-mode only" + ) # Recurse into subnodes to generate DTB entries. This is mainly needed to # generate the PMU entry. diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py index 936c032780..25cf8b2172 100644 --- a/src/arch/arm/ArmSystem.py +++ b/src/arch/arm/ArmSystem.py @@ -49,6 +49,21 @@ class SveVectorLength(UInt8): max = 16 +class SmeVectorLength(UInt8): + min = 1 + max = 16 + + def _check(self): + super()._check() + + # SME needs to be a whole power of 2. We already know value is + # not zero. Hence: + if self.value & (self.value - 1) != 0: + raise TypeError( + "SME vector length is not a power of 2: %d" % self.value + ) + + class ArmExtension(ScopedEnum): vals = [ # Armv8.1 @@ -69,6 +84,8 @@ class ArmExtension(ScopedEnum): "FEAT_PAuth", # Armv8.4 "FEAT_SEL2", + # Armv9.2 + "FEAT_SME", # Optional in Armv9.2 # Others "SECURITY", "LPAE", @@ -145,6 +162,8 @@ class ArmDefaultRelease(Armv8): "FEAT_PAuth", # Armv8.4 "FEAT_SEL2", + # Armv9.2 + "FEAT_SME", ] @@ -176,6 +195,10 @@ class Armv84(Armv83): extensions = Armv83.extensions + ["FEAT_SEL2"] +class Armv92(Armv84): + extensions = Armv84.extensions + ["FEAT_SME"] + + class ArmSystem(System): type = "ArmSystem" cxx_header = "arch/arm/system.hh" @@ -205,6 +228,9 @@ class ArmSystem(System): sve_vl = Param.SveVectorLength( 1, "SVE vector length in quadwords (128-bit)" ) + sme_vl = Param.SveVectorLength( + 1, "SME vector length in quadwords (128-bit)" + ) semihosting = Param.ArmSemihosting( NULL, "Enable support for the Arm semihosting by settings this parameter", diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index c6bb2bd8d1..78a1f4fc9f 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -110,6 +110,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL), haveLargeAsid64 = system->haveLargeAsid64(); physAddrRange = system->physAddrRange(); sveVL = system->sveVL(); + smeVL = system->smeVL(); release = system->releaseFS(); } else { @@ -117,6 +118,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL), haveLargeAsid64 = false; physAddrRange = 32; // dummy value sveVL = p.sve_vl_se; + smeVL = p.sme_vl_se; release = p.release_se; } @@ -406,6 +408,49 @@ ISA::initID64(const ArmISAParams &p) miscRegs[MISCREG_ZCR_EL1] = sveVL - 1; } + // SME + + // Set up the SME SMIDR + // [63:32] RES0 + // [31:24] Implementer - default this to Arm Limited + // [23:16] SMCU Revision - set to 0 as we don't model an SMCU + // [15] SMPS - We don't do priorities in gem5, so disable + // [14:12] RES0 + // [11:0] Affinity - we implement per-CPU SME, so set to 0 (no SMCU) + miscRegs[MISCREG_SMIDR_EL1] = 0 | // Affinity + 0 << 15 | // SMPS + 0x41 << 24; // Implementer + + miscRegs[MISCREG_ID_AA64SMFR0_EL1] = 0; + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 32; // F32F32 + // The following BF16F32 is actually not implemented due to a lack + // of BF16 support in gem5's fplib. However, as per the SME spec the + // _only_ allowed value is 0x1. + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 34; // BF16F32 + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 35; // F16F32 + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0xFUL << 36; // I8I32 + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 48; // F64F64 + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0xFUL << 52; // I16I64 + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x0UL << 56; // SMEver + miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 32; // FA64 + + // We want to support FEAT_SME_FA64. Therefore, we enable it in all + // SMCR_ELx registers by default. Runtime software might change this + // later, but given that gem5 doesn't disable instructions based on + // this flag we default to the most representative value. + miscRegs[MISCREG_SMCR_EL3] = 0x1 << 31; + miscRegs[MISCREG_SMCR_EL2] = 0x1 << 31; + miscRegs[MISCREG_SMCR_EL1] = 0x1 << 31; + + // Set the vector default vector length + if (release->has(ArmExtension::SECURITY)) { + miscRegs[MISCREG_SMCR_EL3] |= ((smeVL - 1) & 0xF); + } else if (release->has(ArmExtension::VIRTUALIZATION)) { + miscRegs[MISCREG_SMCR_EL2] |= ((smeVL - 1) & 0xF); + } else { + miscRegs[MISCREG_SMCR_EL1] |= ((smeVL - 1) & 0xF); + } + // Enforce consistency with system-level settings... // EL3 @@ -420,6 +465,10 @@ ISA::initID64(const ArmISAParams &p) miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( miscRegs[MISCREG_ID_AA64PFR0_EL1], 35, 32, release->has(ArmExtension::FEAT_SVE) ? 0x1 : 0x0); + // SME + miscRegs[MISCREG_ID_AA64PFR1_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR1_EL1], 27, 24, + release->has(ArmExtension::FEAT_SME) ? 0x1 : 0x0); // SecEL2 miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( miscRegs[MISCREG_ID_AA64PFR0_EL1], 39, 36, @@ -962,6 +1011,10 @@ ISA::readMiscReg(RegIndex idx) { return miscRegs[MISCREG_CPSR] & 0x800000; } + case MISCREG_SVCR: + { + return miscRegs[MISCREG_SVCR]; + } case MISCREG_L2CTLR: { // mostly unimplemented, just set NumCPUs field from sim and return @@ -1037,7 +1090,9 @@ ISA::readMiscReg(RegIndex idx) 0x0000001000000000 : 0) | // SecEL2 (gicv3CpuInterface ? 0x0000000001000000 : 0); case MISCREG_ID_AA64PFR1_EL1: - return 0; // bits [63:0] RES0 (reserved for future use) + return 0x0 | + (release->has(ArmExtension::FEAT_SME) ? + 0x1 << 24 : 0); // SME // Generic Timer registers case MISCREG_CNTFRQ ... MISCREG_CNTVOFF: @@ -1188,6 +1243,9 @@ ISA::setMiscReg(RegIndex idx, RegVal val) if (release->has(ArmExtension::FEAT_SVE)) { cpacrMask.zen = ones; } + if (release->has(ArmExtension::FEAT_SME)) { + cpacrMask.smen = ones; + } newVal &= cpacrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[idx], newVal); @@ -1205,14 +1263,21 @@ ISA::setMiscReg(RegIndex idx, RegVal val) cptrMask.tz = ones; cptrMask.zen = hcr.e2h ? ones : 0; } + if (release->has(ArmExtension::FEAT_SME)) { + cptrMask.tsm = ones; + cptrMask.smen = hcr.e2h ? ones : 0; + } cptrMask.fpen = hcr.e2h ? ones : 0; newVal &= cptrMask; cptrMask = 0; - cptrMask.res1_13_12_el2 = ones; + cptrMask.res1_13_el2 = ones; cptrMask.res1_7_0_el2 = ones; if (!release->has(ArmExtension::FEAT_SVE)) { cptrMask.res1_8_el2 = ones; } + if (!release->has(ArmExtension::FEAT_SME)) { + cptrMask.res1_12_el2 = ones; + } cptrMask.res1_9_el2 = ones; newVal |= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", @@ -1229,6 +1294,9 @@ ISA::setMiscReg(RegIndex idx, RegVal val) if (release->has(ArmExtension::FEAT_SVE)) { cptrMask.ez = ones; } + if (release->has(ArmExtension::FEAT_SME)) { + cptrMask.esm = ones; + } newVal &= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[idx], newVal); @@ -1917,6 +1985,21 @@ ISA::setMiscReg(RegIndex idx, RegVal val) idx = MISCREG_CPSR; } break; + case MISCREG_SVCR: + { + SVCR svcr = miscRegs[MISCREG_SVCR]; + SVCR newSvcr = newVal; + + // Don't allow other bits to be set + svcr.sm = newSvcr.sm; + svcr.za = newSvcr.za; + newVal = svcr; + } + break; + case MISCREG_SMPRI_EL1: + // Only the bottom 4 bits are settable + newVal = newVal & 0xF; + break; case MISCREG_AT_S1E1R_Xt: addressTranslation64(MMU::S1E1Tran, BaseMMU::Read, 0, val); return; @@ -1982,6 +2065,16 @@ ISA::setMiscReg(RegIndex idx, RegVal val) tc->getDecoderPtr()->as().setSveLen( (getCurSveVecLenInBits() >> 7) - 1); return; + case MISCREG_SMCR_EL3: + case MISCREG_SMCR_EL2: + case MISCREG_SMCR_EL1: + // Set the value here as we need to update the regs before + // reading them back in getCurSmeVecLenInBits (not + // implemented yet) to avoid setting stale vector lengths in + // the decoder. + setMiscRegNoEffect(idx, newVal); + // TODO: set the SME vector length + return; } setMiscRegNoEffect(idx, newVal); } diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index 6f9478298d..bc0ab7683e 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -96,6 +96,9 @@ namespace ArmISA /** SVE vector length in quadwords */ unsigned sveVL; + /** SME vector length in quadwords */ + unsigned smeVL; + /** This could be either a FS or a SE release */ const ArmRelease *release; diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc index 142b25f8c4..382b63e8bd 100644 --- a/src/arch/arm/regs/misc.cc +++ b/src/arch/arm/regs/misc.cc @@ -912,7 +912,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 0, 0, 4, 2), MISCREG_RAZ }, { MiscRegNum64(3, 0, 0, 4, 3), MISCREG_RAZ }, { MiscRegNum64(3, 0, 0, 4, 4), MISCREG_ID_AA64ZFR0_EL1 }, - { MiscRegNum64(3, 0, 0, 4, 5), MISCREG_RAZ }, + { MiscRegNum64(3, 0, 0, 4, 5), MISCREG_ID_AA64SMFR0_EL1 }, { MiscRegNum64(3, 0, 0, 4, 6), MISCREG_RAZ }, { MiscRegNum64(3, 0, 0, 4, 7), MISCREG_RAZ }, { MiscRegNum64(3, 0, 0, 5, 0), MISCREG_ID_AA64DFR0_EL1 }, @@ -943,6 +943,8 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 0, 1, 0, 1), MISCREG_ACTLR_EL1 }, { MiscRegNum64(3, 0, 1, 0, 2), MISCREG_CPACR_EL1 }, { MiscRegNum64(3, 0, 1, 2, 0), MISCREG_ZCR_EL1 }, + { MiscRegNum64(3, 0, 1, 2, 4), MISCREG_SMPRI_EL1 }, + { MiscRegNum64(3, 0, 1, 2, 6), MISCREG_SMCR_EL1 }, { MiscRegNum64(3, 0, 2, 0, 0), MISCREG_TTBR0_EL1 }, { MiscRegNum64(3, 0, 2, 0, 1), MISCREG_TTBR1_EL1 }, { MiscRegNum64(3, 0, 2, 0, 2), MISCREG_TCR_EL1 }, @@ -981,6 +983,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 0, 9, 14, 2), MISCREG_PMINTENCLR_EL1 }, { MiscRegNum64(3, 0, 10, 2, 0), MISCREG_MAIR_EL1 }, { MiscRegNum64(3, 0, 10, 3, 0), MISCREG_AMAIR_EL1 }, + { MiscRegNum64(3, 0, 10, 5, 3), MISCREG_MPAMSM_EL1 }, { MiscRegNum64(3, 0, 12, 0, 0), MISCREG_VBAR_EL1 }, { MiscRegNum64(3, 0, 12, 0, 1), MISCREG_RVBAR_EL1 }, { MiscRegNum64(3, 0, 12, 1, 0), MISCREG_ISR_EL1 }, @@ -1024,6 +1027,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 0, 15, 1, 4), MISCREG_DL1DATA4_EL1 }, { MiscRegNum64(3, 1, 0, 0, 0), MISCREG_CCSIDR_EL1 }, { MiscRegNum64(3, 1, 0, 0, 1), MISCREG_CLIDR_EL1 }, + { MiscRegNum64(3, 1, 0, 0, 6), MISCREG_SMIDR_EL1 }, { MiscRegNum64(3, 1, 0, 0, 7), MISCREG_AIDR_EL1 }, { MiscRegNum64(3, 1, 11, 0, 2), MISCREG_L2CTLR_EL1 }, { MiscRegNum64(3, 1, 11, 0, 3), MISCREG_L2ECTLR_EL1 }, @@ -1038,6 +1042,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 3, 0, 0, 7), MISCREG_DCZID_EL0 }, { MiscRegNum64(3, 3, 4, 2, 0), MISCREG_NZCV }, { MiscRegNum64(3, 3, 4, 2, 1), MISCREG_DAIF }, + { MiscRegNum64(3, 3, 4, 2, 2), MISCREG_SVCR }, { MiscRegNum64(3, 3, 4, 4, 0), MISCREG_FPCR }, { MiscRegNum64(3, 3, 4, 4, 1), MISCREG_FPSR }, { MiscRegNum64(3, 3, 4, 5, 0), MISCREG_DSPSR_EL0 }, @@ -1057,6 +1062,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 3, 9, 14, 3), MISCREG_PMOVSSET_EL0 }, { MiscRegNum64(3, 3, 13, 0, 2), MISCREG_TPIDR_EL0 }, { MiscRegNum64(3, 3, 13, 0, 3), MISCREG_TPIDRRO_EL0 }, + { MiscRegNum64(3, 3, 13, 0, 5), MISCREG_TPIDR2_EL0 }, { MiscRegNum64(3, 3, 14, 0, 0), MISCREG_CNTFRQ_EL0 }, { MiscRegNum64(3, 3, 14, 0, 1), MISCREG_CNTPCT_EL0 }, { MiscRegNum64(3, 3, 14, 0, 2), MISCREG_CNTVCT_EL0 }, @@ -1087,8 +1093,13 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 4, 1, 1, 1), MISCREG_MDCR_EL2 }, { MiscRegNum64(3, 4, 1, 1, 2), MISCREG_CPTR_EL2 }, { MiscRegNum64(3, 4, 1, 1, 3), MISCREG_HSTR_EL2 }, + { MiscRegNum64(3, 4, 1, 1, 4), MISCREG_HFGRTR_EL2 }, + { MiscRegNum64(3, 4, 1, 1, 5), MISCREG_HFGWTR_EL2 }, { MiscRegNum64(3, 4, 1, 1, 7), MISCREG_HACR_EL2 }, { MiscRegNum64(3, 4, 1, 2, 0), MISCREG_ZCR_EL2 }, + { MiscRegNum64(3, 4, 1, 2, 2), MISCREG_HCRX_EL2 }, + { MiscRegNum64(3, 4, 1, 2, 5), MISCREG_SMPRIMAP_EL2 }, + { MiscRegNum64(3, 4, 1, 2, 6), MISCREG_SMCR_EL2 }, { MiscRegNum64(3, 4, 2, 0, 0), MISCREG_TTBR0_EL2 }, { MiscRegNum64(3, 4, 2, 0, 1), MISCREG_TTBR1_EL2 }, { MiscRegNum64(3, 4, 2, 0, 2), MISCREG_TCR_EL2 }, @@ -1167,6 +1178,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 5, 1, 0, 0), MISCREG_SCTLR_EL12 }, { MiscRegNum64(3, 5, 1, 0, 2), MISCREG_CPACR_EL12 }, { MiscRegNum64(3, 5, 1, 2, 0), MISCREG_ZCR_EL12 }, + { MiscRegNum64(3, 5, 1, 2, 6), MISCREG_SMCR_EL12 }, { MiscRegNum64(3, 5, 2, 0, 0), MISCREG_TTBR0_EL12 }, { MiscRegNum64(3, 5, 2, 0, 1), MISCREG_TTBR1_EL12 }, { MiscRegNum64(3, 5, 2, 0, 2), MISCREG_TCR_EL12 }, @@ -1193,6 +1205,7 @@ std::unordered_map miscRegNumToIdx{ { MiscRegNum64(3, 6, 1, 1, 1), MISCREG_SDER32_EL3 }, { MiscRegNum64(3, 6, 1, 1, 2), MISCREG_CPTR_EL3 }, { MiscRegNum64(3, 6, 1, 2, 0), MISCREG_ZCR_EL3 }, + { MiscRegNum64(3, 6, 1, 2, 6), MISCREG_SMCR_EL3 }, { MiscRegNum64(3, 6, 1, 3, 1), MISCREG_MDCR_EL3 }, { MiscRegNum64(3, 6, 2, 0, 0), MISCREG_TTBR0_EL3 }, { MiscRegNum64(3, 6, 2, 0, 2), MISCREG_TCR_EL3 }, @@ -4932,6 +4945,30 @@ ISA::initializeMiscRegMetadata() .fault(EL3, faultZcrEL3) .allPrivileges().exceptUserMode(); + // SME + InitReg(MISCREG_ID_AA64SMFR0_EL1) + .allPrivileges().exceptUserMode().writes(0); + InitReg(MISCREG_SVCR) + .allPrivileges(); + InitReg(MISCREG_SMIDR_EL1) + .allPrivileges().exceptUserMode().writes(0); + InitReg(MISCREG_SMPRI_EL1) + .allPrivileges().exceptUserMode().reads(1); + InitReg(MISCREG_SMPRIMAP_EL2) + .hyp().mon(); + InitReg(MISCREG_SMCR_EL3) + .mon(); + InitReg(MISCREG_SMCR_EL2) + .hyp().mon(); + InitReg(MISCREG_SMCR_EL12) + .allPrivileges().exceptUserMode(); + InitReg(MISCREG_SMCR_EL1) + .allPrivileges().exceptUserMode(); + InitReg(MISCREG_TPIDR2_EL0) + .allPrivileges(); + InitReg(MISCREG_MPAMSM_EL1) + .allPrivileges().exceptUserMode(); + // Dummy registers InitReg(MISCREG_NOP) .allPrivileges(); @@ -4979,6 +5016,19 @@ ISA::initializeMiscRegMetadata() .warnNotFail() .fault(faultUnimplemented); + // HCX extension (unimplemented) + InitReg(MISCREG_HCRX_EL2) + .unimplemented() + .warnNotFail(); + + // FGT extension (unimplemented) + InitReg(MISCREG_HFGRTR_EL2) + .unimplemented() + .warnNotFail(); + InitReg(MISCREG_HFGWTR_EL2) + .unimplemented() + .warnNotFail(); + // Register mappings for some unimplemented registers: // ESR_EL1 -> DFSR // RMR_EL1 -> RMR diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh index 0b61eec2d6..999993b0de 100644 --- a/src/arch/arm/regs/misc.hh +++ b/src/arch/arm/regs/misc.hh @@ -1062,6 +1062,19 @@ namespace ArmISA MISCREG_ZCR_EL12, MISCREG_ZCR_EL1, + // SME + MISCREG_ID_AA64SMFR0_EL1, + MISCREG_SVCR, + MISCREG_SMIDR_EL1, + MISCREG_SMPRI_EL1, + MISCREG_SMPRIMAP_EL2, + MISCREG_SMCR_EL3, + MISCREG_SMCR_EL2, + MISCREG_SMCR_EL12, + MISCREG_SMCR_EL1, + MISCREG_TPIDR2_EL0, + MISCREG_MPAMSM_EL1, + // NUM_PHYS_MISCREGS specifies the number of actual physical // registers, not considering the following pseudo-registers // (dummy registers), like MISCREG_UNKNOWN, MISCREG_IMPDEF_UNIMPL. @@ -1092,6 +1105,13 @@ namespace ArmISA MISCREG_VSESR_EL2, MISCREG_VDISR_EL2, + // HCX extension (unimplemented) + MISCREG_HCRX_EL2, + + // FGT extension (unimplemented) + MISCREG_HFGRTR_EL2, + MISCREG_HFGWTR_EL2, + // PSTATE MISCREG_PAN, MISCREG_UAO, @@ -2684,6 +2704,18 @@ namespace ArmISA "zcr_el12", "zcr_el1", + "id_aa64smfr0_el1", + "svcr", + "smidr_el1", + "smpri_el1", + "smprimap_el2", + "smcr_el3", + "smcr_el2", + "smcr_el12", + "smcr_el1", + "tpidr2_el0", + "mpamsm_el1", + "num_phys_regs", // Dummy registers @@ -2702,6 +2734,9 @@ namespace ArmISA "disr_el1", "vsesr_el2", "vdisr_el2", + "hcrx_el2", + "hfgrtr_el2", + "hfgwtr_el2", // PSTATE "pan", diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh index 05bf19bf5a..0fe5a00ae8 100644 --- a/src/arch/arm/regs/misc_types.hh +++ b/src/arch/arm/regs/misc_types.hh @@ -416,6 +416,7 @@ namespace ArmISA Bitfield<21, 20> cp10; Bitfield<21, 20> fpen; // AArch64 Bitfield<23, 22> cp11; + Bitfield<25, 24> smen; // SME Bitfield<25, 24> cp12; Bitfield<27, 26> cp13; Bitfield<29, 28> rsvd; @@ -734,10 +735,14 @@ namespace ArmISA Bitfield<31> tcpac; Bitfield<30> tam; Bitfield<28> tta_e2h; + Bitfield<25, 24> smen; Bitfield<21, 20> fpen; Bitfield<20> tta; Bitfield<17, 16> zen; - Bitfield<13, 12> res1_13_12_el2; + Bitfield<13, 13> res1_13_el2; + Bitfield<12, 12> res1_12_el2; + Bitfield<12> esm; // SME (CPTR_EL3) + Bitfield<12> tsm; // SME (CPTR_EL2) Bitfield<10> tfp; Bitfield<9> res1_9_el2; Bitfield<8> res1_8_el2; @@ -750,6 +755,34 @@ namespace ArmISA Bitfield<3, 0> len; EndBitUnion(ZCR) + BitUnion64(SMCR) + Bitfield<63, 32> res0_63_32; + Bitfield<31, 31> fa64; + Bitfield<30, 9> res0_30_9; + Bitfield<8, 4> razwi_8_4; + Bitfield<3, 0> len; + EndBitUnion(SMCR) + + BitUnion64(SVCR) + Bitfield<63, 2> res0_63_2; + Bitfield<1, 1> za; + Bitfield<0, 0> sm; + EndBitUnion(SVCR) + + BitUnion64(SMIDR) + Bitfield<63, 32> res0_63_32; + Bitfield<31, 24> implementer; + Bitfield<23, 16> revision; + Bitfield<15, 15> smps; + Bitfield<14, 12> res0_14_12; + Bitfield<11, 0> affinity; + EndBitUnion(SMIDR) + + BitUnion64(SMPRI) + Bitfield<63, 4> res0_63_4; + Bitfield<3, 0> priority; + EndBitUnion(SMPRI) + BitUnion32(OSL) Bitfield<64, 4> res0; Bitfield<3> oslm_3; diff --git a/src/arch/arm/system.cc b/src/arch/arm/system.cc index 747695f16e..d6e28e70d9 100644 --- a/src/arch/arm/system.cc +++ b/src/arch/arm/system.cc @@ -78,6 +78,7 @@ ArmSystem::ArmSystem(const Params &p) _physAddrRange64(p.phys_addr_range_64), _haveLargeAsid64(p.have_large_asid_64), _sveVL(p.sve_vl), + _smeVL(p.sme_vl), semihosting(p.semihosting), release(p.release), multiProc(p.multi_proc) diff --git a/src/arch/arm/system.hh b/src/arch/arm/system.hh index a712615b6e..7b0e6be322 100644 --- a/src/arch/arm/system.hh +++ b/src/arch/arm/system.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2013, 2015-2021 ARM Limited + * Copyright (c) 2010, 2012-2013, 2015-2022 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -128,6 +128,9 @@ class ArmSystem : public System /** SVE vector length at reset, in quadwords */ const unsigned _sveVL; + /** SME vector length at reset, in quadwords */ + const unsigned _smeVL; + /** * True if the Semihosting interface is enabled. */ @@ -205,6 +208,9 @@ class ArmSystem : public System /** Returns the SVE vector length at reset, in quadwords */ unsigned sveVL() const { return _sveVL; } + /** Returns the SME vector length at reset, in quadwords */ + unsigned smeVL() const { return _smeVL; } + /** Returns the supported physical address range in bits if the highest * implemented exception level is 64 bits (ARMv8) */ uint8_t physAddrRange64() const { return _physAddrRange64; } From 72e4f614a2ecf81fa4053d973805276da451f539 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 14:54:04 +0100 Subject: [PATCH 147/492] arch-arm: Add interfaces to set and get SME vector length We add interfaces which roughly mirror those already present for manipulating the SVE vector lengths to set/get the SME vector length. In the case of the SME vector length we also need to do some checking to ensure that the vector length itself is aligned to a whole power of two (one of the SME requirements). Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: Ib89a4804466f5445adea6de8d65df512e366d618 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64336 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Reviewed-by: Giacomo Travaglini Tested-by: kokoro --- src/arch/arm/decoder.cc | 4 ++ src/arch/arm/decoder.hh | 12 ++++++ src/arch/arm/insts/static_inst.cc | 8 ++++ src/arch/arm/insts/static_inst.hh | 15 +++++++ src/arch/arm/isa.cc | 67 +++++++++++++++++++++++++++++-- src/arch/arm/isa.hh | 4 ++ 6 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index c315ecfefb..9fc4be0e9a 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -67,6 +67,10 @@ Decoder::Decoder(const ArmDecoderParams ¶ms) sveLen = (safe_cast(params.isa)-> getCurSveVecLenInBitsAtReset() >> 7) - 1; + // Initialize SME vector length + smeLen = (safe_cast(params.isa) + ->getCurSmeVecLenInBitsAtReset() >> 7) - 1; + if (dvmEnabled) { warn_once( "DVM Ops instructions are micro-architecturally " diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index 8e486a3458..83690936c0 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -85,6 +85,12 @@ class Decoder : public InstDecoder */ int sveLen; + /** + * SME vector length, encoded in the same format as the SMCR_EL.LEN + * bitfields. + */ + int smeLen; + enums::DecoderFlavor decoderFlavor; /// A cache of decoded instruction objects. @@ -158,6 +164,12 @@ class Decoder : public InstDecoder { sveLen = len; } + + void + setSmeLen(uint8_t len) + { + smeLen = len; + } }; } // namespace ArmISA diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index c07fb3922a..446f2afd18 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1233,5 +1233,13 @@ ArmStaticInst::getCurSveVecLenInBits(ThreadContext *tc) return isa->getCurSveVecLenInBits(); } +unsigned +ArmStaticInst::getCurSmeVecLenInBits(ThreadContext *tc) +{ + auto *isa = static_cast(tc->getIsaPtr()); + return isa->getCurSmeVecLenInBits(); +} + + } // namespace ArmISA } // namespace gem5 diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index fa58f98de9..3b67e6b253 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -583,6 +583,21 @@ class ArmStaticInst : public StaticInst return getCurSveVecLenInBits(tc) / (8 * sizeof(T)); } + static unsigned getCurSmeVecLenInBits(ThreadContext *tc); + + static unsigned + getCurSmeVecLenInQWords(ThreadContext *tc) + { + return getCurSmeVecLenInBits(tc) >> 6; + } + + template + static unsigned + getCurSmeVecLen(ThreadContext *tc) + { + return getCurSmeVecLenInBits(tc) / (8 * sizeof(T)); + } + inline Fault undefined(bool disabled=false) const { diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 78a1f4fc9f..aec824387b 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -1162,6 +1162,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val) tc->getDecoderPtr()->as().setSveLen( (getCurSveVecLenInBits() >> 7) - 1); + tc->getDecoderPtr()->as().setSmeLen( + (getCurSmeVecLenInBits() >> 7) - 1); // Follow slightly different semantics if a CheckerCPU object // is connected @@ -2069,11 +2071,11 @@ ISA::setMiscReg(RegIndex idx, RegVal val) case MISCREG_SMCR_EL2: case MISCREG_SMCR_EL1: // Set the value here as we need to update the regs before - // reading them back in getCurSmeVecLenInBits (not - // implemented yet) to avoid setting stale vector lengths in - // the decoder. + // reading them back in getCurSmeVecLenInBits to avoid + // setting stale vector lengths in the decoder. setMiscRegNoEffect(idx, newVal); - // TODO: set the SME vector length + tc->getDecoderPtr()->as().setSmeLen( + (getCurSmeVecLenInBits() >> 7) - 1); return; } setMiscRegNoEffect(idx, newVal); @@ -2161,6 +2163,13 @@ ISA::currEL() const unsigned ISA::getCurSveVecLenInBits() const { + SVCR svcr = miscRegs[MISCREG_SVCR]; + // If we are in Streaming Mode, we should return the Streaming Mode vector + // length instead. + if (svcr.sm) { + return getCurSmeVecLenInBits(); + } + if (!FullSystem) { return sveVL * 128; } @@ -2202,6 +2211,56 @@ ISA::getCurSveVecLenInBits() const return (len + 1) * 128; } +unsigned +ISA::getCurSmeVecLenInBits() const +{ + if (!FullSystem) { + return smeVL * 128; + } + + panic_if(!tc, + "A ThreadContext is needed to determine the SME vector length " + "in full-system mode"); + + CPSR cpsr = miscRegs[MISCREG_CPSR]; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + + unsigned len = 0; + + if (el == EL1 || (el == EL0 && !ELIsInHost(tc, el))) { + len = static_cast(miscRegs[MISCREG_SMCR_EL1]).len; + } + + if (el == EL2 || (el == EL0 && ELIsInHost(tc, el))) { + len = static_cast(miscRegs[MISCREG_SMCR_EL2]).len; + } else if (release->has(ArmExtension::VIRTUALIZATION) && !isSecure(tc) && + (el == EL0 || el == EL1)) { + len = std::min( + len, + static_cast( + static_cast(miscRegs[MISCREG_SMCR_EL2]).len)); + } + + if (el == EL3) { + len = static_cast(miscRegs[MISCREG_SMCR_EL3]).len; + } else if (release->has(ArmExtension::SECURITY)) { + len = std::min( + len, + static_cast( + static_cast(miscRegs[MISCREG_SMCR_EL3]).len)); + } + + len = std::min(len, smeVL - 1); + + // len + 1 must be a power of 2! Round down to the nearest whole power of + // two. + static const unsigned LUT[16] = {0, 1, 1, 3, 3, 3, 3, 7, + 7, 7, 7, 7, 7, 7, 7, 15}; + len = LUT[len]; + + return (len + 1) * 128; +} + void ISA::serialize(CheckpointOut &cp) const { diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index bc0ab7683e..512799feed 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -367,6 +367,10 @@ namespace ArmISA unsigned getCurSveVecLenInBitsAtReset() const { return sveVL * 128; } + unsigned getCurSmeVecLenInBits() const; + + unsigned getCurSmeVecLenInBitsAtReset() const { return smeVL * 128; } + template static void zeroSveVecRegUpperPart(Elem *v, unsigned eCount) From 142d562b2f3df36401f3a5b2b68ef6026a599717 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 14:57:33 +0100 Subject: [PATCH 148/492] arch-arm: Implement SME access traps and extend the SVE ones We add the SME access checks and trapping, which roughly mirrors that used by SVE. SME adds a new mode called streaming mode. When a core is in streaming mode the behaviour of the SVE instructions changes such that they check the SME traps and enables as opposed to the SVE ones. We therefore update the existing SVE trap/access checking code to check the SME equivalents when a core is in streaming mode. Else, the original behaviour is preserved. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I7eba70da9d41d2899b753fababbd6074ed732501 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64337 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- src/arch/arm/insts/static_inst.cc | 127 +++++++++++++++++++++++++++++ src/arch/arm/insts/static_inst.hh | 29 +++++++ src/arch/arm/isa/templates/sve.isa | 4 +- src/arch/arm/types.hh | 1 + 4 files changed, 160 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 446f2afd18..54045f2fb1 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1026,6 +1026,13 @@ ArmStaticInst::sveAccessTrap(ExceptionLevel el) const Fault ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const { + // We first check if we are in streaming mode or not. If we are in + // streaming mode, we actually check the SME traps, not the SVE traps! + SVCR svcr_sm_check = tc->readMiscReg(MISCREG_SVCR); + if (svcr_sm_check.sm) { + return checkSmeEnabled(tc, cpsr, cpacr); + } + const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; // Check if access disabled in CPACR_EL1 if (el <= EL1 && !ELIsInHost(tc, el)) { @@ -1073,6 +1080,126 @@ ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const return NoFault; } +Fault +ArmStaticInst::smeAccessTrap(ExceptionLevel el, uint32_t iss) const +{ + switch (el) { + case EL1: + return std::make_shared( + machInst, iss, ExceptionClass::TRAPPED_SME); + case EL2: + return std::make_shared( + machInst, iss, ExceptionClass::TRAPPED_SME); + case EL3: + return std::make_shared( + machInst, iss, ExceptionClass::TRAPPED_SME); + + default: + panic("Illegal EL in smeAccessTrap\n"); + } +} + +Fault +ArmStaticInst::checkSmeEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const +{ + const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + // Check if access disabled in CPACR_EL1 + if (el <= EL1 && !ELIsInHost(tc, el)) { + if ((el == EL0 && cpacr.smen == 0x1) || + (!(cpacr.smen & 0x1))) + return smeAccessTrap(EL1); + + if ((el == EL0 && cpacr.fpen == 0x1) || + (!(cpacr.fpen & 0x1))) + return advSIMDFPAccessTrap64(EL1); + } + + // Check if access disabled in CPTR_EL2 + if (el <= EL2 && EL2Enabled(tc)) { + CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL2); + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + if (HaveExt(tc, ArmExtension::FEAT_VHE) && hcr.e2h) { + if (((cptr_en_check.smen & 0x1) == 0x0) || + (cptr_en_check.smen == 0x1 && el == EL0 && + hcr.tge == 0x1)) { + return smeAccessTrap(EL2); + } + if (((cptr_en_check.fpen & 0x1) == 0x0) || + (cptr_en_check.fpen == 0x1 && el == EL0 && + hcr.tge == 0x1)) { + return advSIMDFPAccessTrap64(EL2); + } + } else { + if (cptr_en_check.tsm == 1) + return smeAccessTrap(EL2); + if (cptr_en_check.tfp == 1) + return advSIMDFPAccessTrap64(EL2); + } + } + + // Check if access disabled in CPTR_EL3 + if (ArmSystem::haveEL(tc, EL3)) { + CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL3); + if (!cptr_en_check.esm) + return smeAccessTrap(EL3); + if (cptr_en_check.tfp) + return advSIMDFPAccessTrap64(EL3); + } + + return NoFault; +} + +Fault +ArmStaticInst::checkSmeAccess(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const +{ + const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + // Check if access disabled in CPACR_EL1 + if (el <= EL1 && !ELIsInHost(tc, el)) { + if ((el == EL0 && cpacr.smen == 0x1) || (!(cpacr.smen & 0x1))) { + return smeAccessTrap(EL1); + } + } + + // Check if access disabled in CPTR_EL2 + if (el <= EL2 && EL2Enabled(tc)) { + CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL2); + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + if (HaveExt(tc, ArmExtension::FEAT_VHE) && hcr.e2h) { + if (((cptr_en_check.smen & 0x1) == 0x0) || + (cptr_en_check.smen == 0x1 && el == EL0 && + hcr.tge == 0x1)) { + return smeAccessTrap(EL2); + } + } else { + if (cptr_en_check.tsm == 1) + return smeAccessTrap(EL2); + } + } + + // Check if access disabled in CPTR_EL3 + if (ArmSystem::haveEL(tc, EL3)) { + CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL3); + if (!cptr_en_check.esm) + return smeAccessTrap(EL3); + } + + return NoFault; +} + +Fault +ArmStaticInst::checkSveSmeEnabled(ThreadContext *tc, CPSR cpsr, + CPACR cpacr) const +{ + // If we are not in streaming mode, check the SVE traps, else check the SME + // traps. + SVCR svcr = tc->readMiscReg(MISCREG_SVCR); + if (!svcr.sm) { + return checkSveEnabled(tc, cpsr, cpacr); + } else { + return checkSmeEnabled(tc, cpsr, cpacr); + } +} + static uint8_t getRestoredITBits(ThreadContext *tc, CPSR spsr) { diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index 3b67e6b253..cc96dd9269 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -513,6 +513,35 @@ class ArmStaticInst : public StaticInst */ Fault checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const; + + /** + * Trap an access to SME registers due to access control bits. + * + * @param el Target EL for the trap. + * @param iss ISS to be used for the trap. + */ + Fault smeAccessTrap(ExceptionLevel el, uint32_t iss = 0) const; + + /** + * Check if SME is enabled by checking the SME and FP bits of + * CPACR_EL1, CPTR_EL2, and CPTR_EL3 + */ + Fault checkSmeEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const; + + /** + * Check an SME access against CPACR_EL1, CPTR_EL2, and CPTR_EL3. + * This is purely used from the management instructions as it should + * be possible to call SMSTART/SMSTOP without having the floating + * point flags correctly set up. + */ + Fault checkSmeAccess(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const; + + /** + * Check an SVE access against CPACR_EL1, CPTR_EL2, and CPTR_EL3, but + * choosing the correct set of traps to check based on Streaming Mode + */ + Fault checkSveSmeEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const; + /** * Get the new PSTATE from a SPSR register in preparation for an * exception return. diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index 87316f1440..fc38a2b979 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -36,7 +36,9 @@ let {{ sveEnabledCheckCode = ''' if (FullSystem) { - fault = this->checkSveEnabled(xc->tcBase(), Cpsr, Cpacr64); + // Check an SVE inst against the appropriate traps/enables based + // on the Streaming Mode. + fault = this->checkSveSmeEnabled(xc->tcBase(), Cpsr, Cpacr64); if (fault != NoFault) { return fault; } diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index 50db9bc9af..2251d57c0b 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -323,6 +323,7 @@ namespace ArmISA SMC_64 = 0x17, TRAPPED_MSR_MRS_64 = 0x18, TRAPPED_SVE = 0x19, + TRAPPED_SME = 0x1D, PREFETCH_ABORT_TO_HYP = 0x20, PREFETCH_ABORT_LOWER_EL = 0x20, // AArch64 alias PREFETCH_ABORT_FROM_HYP = 0x21, From fe8eda9c4ee53dec780463b3506c3bba30a57da9 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 15:38:46 +0100 Subject: [PATCH 149/492] arch, arch-arm, cpu: Add matrix reg support to the ISA Parser The ISA parser now emits the code required to access matrix registers. In the case where a register is both a source and a destination, the ISA parser generates appropriate code to make sure that the contents of the source is copied to the destination. This is required for the O3 CPU which treats these as two different physical registers, and hence data is lost if not explicitly preserved. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I8796bd1ea55b5edf5fb8ab92ef1a6060ccc58fa1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64338 Maintainer: Giacomo Travaglini Tested-by: kokoro Reviewed-by: Giacomo Travaglini --- src/arch/arm/isa/operands.isa | 8 ++++ src/arch/isa_parser/isa_parser.py | 5 ++- src/arch/isa_parser/operand_types.py | 65 +++++++++++++++++++++++++++- src/cpu/FuncUnit.py | 5 ++- src/cpu/minor/BaseMinorCPU.py | 3 ++ src/cpu/op_class.hh | 5 ++- 6 files changed, 87 insertions(+), 4 deletions(-) diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 2addd10def..5919ae974e 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -53,6 +53,7 @@ def operand_types {{ 'sf' : 'float', 'df' : 'double', 'vc' : 'ArmISA::VecRegContainer', + 'mc' : 'ArmISA::MatRegContainer', # For operations that are implemented as a template 'x' : 'TPElem', 'xs' : 'TPSElem', @@ -99,6 +100,10 @@ let {{ def __init__(self, idx): super().__init__('pc', idx, sort_pri=srtNormal) + class MatrixReg(MatRegOp): + def __init__(self, idx, suffix=''): + super().__init__('mc', idx, 'IsMatrix', srtNormal) + class IntRegNPC(IntRegOp): @overrideInOperand def regId(self): @@ -454,6 +459,9 @@ def operands {{ 'FfrAux': VecPredReg('PREDREG_FFR'), 'PUreg0': VecPredReg('PREDREG_UREG0'), + # SME ZA Register: + 'ZA': MatrixReg('0'), + #Abstracted control reg operands 'MiscDest': CntrlReg('dest'), 'MiscOp1': CntrlReg('op1'), diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py index aff3c9f63c..39b50f06b6 100755 --- a/src/arch/isa_parser/isa_parser.py +++ b/src/arch/isa_parser/isa_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2014, 2016, 2018-2019 ARM Limited +# Copyright (c) 2014, 2016, 2018-2019, 2022 ARM Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -481,6 +481,8 @@ class InstObjParams(object): self.op_class = "FloatAddOp" elif "IsVector" in self.flags: self.op_class = "SimdAddOp" + elif "IsMatrix" in self.flags: + self.op_class = "MatrixOp" else: self.op_class = "IntAluOp" @@ -564,6 +566,7 @@ class ISAParser(Grammar): "VecElemOp": VecElemOperandDesc, "VecRegOp": VecRegOperandDesc, "VecPredRegOp": VecPredRegOperandDesc, + "MatRegOp": MatRegOperandDesc, "ControlRegOp": ControlRegOperandDesc, "MemOp": MemOperandDesc, "PCStateOp": PCStateOperandDesc, diff --git a/src/arch/isa_parser/operand_types.py b/src/arch/isa_parser/operand_types.py index 63ca765a09..4786f88774 100755 --- a/src/arch/isa_parser/operand_types.py +++ b/src/arch/isa_parser/operand_types.py @@ -1,4 +1,4 @@ -# Copyright (c) 2014, 2016, 2018-2019 ARM Limited +# Copyright (c) 2014, 2016, 2018-2019, 2022 ARM Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -447,6 +447,69 @@ class VecPredRegOperandDesc(RegOperandDesc): super().__init__("vecPredRegClass", VecPredRegOperand, *args, **kwargs) +class MatRegOperand(RegOperand): + reg_class = "MatRegClass" + + def __init__(self, parser, full_name, ext, is_src, is_dest): + super().__init__(parser, full_name, ext, is_src, is_dest) + + def makeDecl(self): + return "" + + def makeReadW(self): + c_readw = ( + f"\t\tauto &tmp_d{self.dest_reg_idx} = \n" + f"\t\t *({self.parser.namespace}::MatRegContainer *)\n" + f"\t\t xc->getWritableRegOperand(this, \n" + f"\t\t {self.dest_reg_idx});\n" + f"\t\tauto &{self.base_name} = tmp_d{self.dest_reg_idx};\n" + ) + + return c_readw + + def makeRead(self): + name = self.base_name + if self.is_dest and self.is_src: + name += "_merger" + + c_read = ( + f"\t\t{self.parser.namespace}::MatRegContainer " + f"\t\t tmp_s{self.src_reg_idx};\n" + f"\t\txc->getRegOperand(this, {self.src_reg_idx},\n" + f"\t\t &tmp_s{self.src_reg_idx});\n" + f"\t\tauto &{name} = tmp_s{self.src_reg_idx};\n" + ) + + # The following is required due to the way that the O3 CPU + # works. The ZA register is seen as two physical registers; one + # for reading from and one for writing to. We need to make sure + # to copy the data from the read-only copy to the writable + # reference (the destination). Failure to do this results in + # data loss for the O3 CPU. Other CPU models don't appear to + # require this. + if self.is_dest and self.is_src: + c_read += f"{self.base_name} = {name};" + + return c_read + + def makeWrite(self): + return f""" + if (traceData) {{ + traceData->setData({self.reg_class}, &tmp_d{self.dest_reg_idx}); + }} + """ + + def finalize(self): + super().finalize() + if self.is_dest: + self.op_rd = self.makeReadW() + self.op_rd + + +class MatRegOperandDesc(RegOperandDesc): + def __init__(self, *args, **kwargs): + super().__init__("matRegClass", MatRegOperand, *args, **kwargs) + + class ControlRegOperand(Operand): reg_class = "miscRegClass" diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py index c5137ac970..4a2733afc0 100644 --- a/src/cpu/FuncUnit.py +++ b/src/cpu/FuncUnit.py @@ -1,4 +1,4 @@ -# Copyright (c) 2010, 2017-2018 ARM Limited +# Copyright (c) 2010, 2017-2018, 2022 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -89,6 +89,9 @@ class OpClass(Enum): "SimdShaSigma2", "SimdShaSigma3", "SimdPredAlu", + "Matrix", + "MatrixMov", + "MatrixOP", "MemRead", "MemWrite", "FloatMemRead", diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py index bcdab1bad5..6641a39b4e 100644 --- a/src/cpu/minor/BaseMinorCPU.py +++ b/src/cpu/minor/BaseMinorCPU.py @@ -215,6 +215,9 @@ class MinorDefaultFloatSimdFU(MinorFU): "SimdSha256Hash2", "SimdShaSigma2", "SimdShaSigma3", + "Matrix", + "MatrixMov", + "MatrixOP", ] ) diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh index 94730f3d5d..4de018f21b 100644 --- a/src/cpu/op_class.hh +++ b/src/cpu/op_class.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2017-2018 ARM Limited + * Copyright (c) 2010, 2017-2018, 2022 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -99,6 +99,9 @@ static const OpClass SimdSha256Hash2Op = enums::SimdSha256Hash2; static const OpClass SimdShaSigma2Op = enums::SimdShaSigma2; static const OpClass SimdShaSigma3Op = enums::SimdShaSigma3; static const OpClass SimdPredAluOp = enums::SimdPredAlu; +static const OpClass MatrixOp = enums::Matrix; +static const OpClass MatrixMovOp = enums::MatrixMov; +static const OpClass MatrixOPOp = enums::MatrixOP; static const OpClass MemReadOp = enums::MemRead; static const OpClass MemWriteOp = enums::MemWrite; static const OpClass FloatMemReadOp = enums::FloatMemRead; From c694d8589f1023f6e565d83b1c636da6a9088bc0 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 17:10:29 +0100 Subject: [PATCH 150/492] arch-arm, cpu: Implement instructions added by FEAT_SME We add the full set of instructions added by Arm's FEAT_SME, with the exception of BMOPA/BMOPS which are BrainFloat16-based outer product instructions. These have been omitted due to the lack of support for BF16 in fplib - the software FP library used for the Arm ISA implementation. The SMEv1 specification can be found at the following location: https://developer.arm.com/documentation/ddi0616/latest Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I4882ab452bfc48770419860f89f1f60c7af8aceb Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64339 Reviewed-by: Giacomo Travaglini Tested-by: kokoro Maintainer: Giacomo Travaglini --- src/arch/arm/SConscript | 1 + src/arch/arm/insts/sme.cc | 183 +++++ src/arch/arm/insts/sme.hh | 229 ++++++ src/arch/arm/insts/sve.cc | 32 + src/arch/arm/insts/sve.hh | 41 + src/arch/arm/isa/formats/aarch64.isa | 37 +- src/arch/arm/isa/formats/formats.isa | 3 + src/arch/arm/isa/formats/sme.isa | 738 ++++++++++++++++++ src/arch/arm/isa/formats/sve_2nd_level.isa | 135 +++- src/arch/arm/isa/formats/sve_top_level.isa | 9 + src/arch/arm/isa/includes.isa | 1 + src/arch/arm/isa/insts/insts.isa | 3 + src/arch/arm/isa/insts/sme.isa | 821 +++++++++++++++++++++ src/arch/arm/isa/insts/sve.isa | 63 ++ src/arch/arm/isa/operands.isa | 5 + src/arch/arm/isa/templates/sme.isa | 773 +++++++++++++++++++ src/arch/arm/isa/templates/sve.isa | 53 ++ src/arch/arm/isa/templates/templates.isa | 3 + 18 files changed, 3103 insertions(+), 27 deletions(-) create mode 100644 src/arch/arm/insts/sme.cc create mode 100644 src/arch/arm/insts/sme.hh create mode 100644 src/arch/arm/isa/formats/sme.isa create mode 100644 src/arch/arm/isa/insts/sme.isa create mode 100644 src/arch/arm/isa/templates/sme.isa diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index 935f082c11..ee5efebf13 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -68,6 +68,7 @@ Source('insts/misc.cc', tags='arm isa') Source('insts/misc64.cc', tags='arm isa') Source('insts/pred_inst.cc', tags='arm isa') Source('insts/pseudo.cc', tags='arm isa') +Source('insts/sme.cc', tags='arm isa') Source('insts/static_inst.cc', tags='arm isa') Source('insts/sve.cc', tags='arm isa') Source('insts/sve_mem.cc', tags='arm isa') diff --git a/src/arch/arm/insts/sme.cc b/src/arch/arm/insts/sme.cc new file mode 100644 index 0000000000..305d332514 --- /dev/null +++ b/src/arch/arm/insts/sme.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2022 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/arm/insts/sme.hh" + +namespace gem5 +{ + +namespace ArmISA +{ + +std::string +SmeAddOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "#%d", imm); + ss << ", "; + printVecReg(ss, op1, true); + ss << ", "; + printVecPredReg(ss, gp1); + ss << ", "; + printVecPredReg(ss, gp2); + return ss.str(); +} + +std::string +SmeAddVlOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ss << ", "; + printVecReg(ss, dest); + ss << ", "; + printVecReg(ss, op1); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + +std::string +SmeLd1xSt1xOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "#%d", imm); + ss << ", "; + printIntReg(ss, op1); + ss << ", "; + printVecPredReg(ss, gp); + ss << ", "; + printIntReg(ss, op2); + ss << ", "; + printIntReg(ss, op3); + return ss.str(); +} + +std::string +SmeLdrStrOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "#%d", imm); + ss << ", "; + printIntReg(ss, op1, true); + ss << ", "; + printIntReg(ss, op2, true); + return ss.str(); +} + +std::string +SmeMovExtractOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, op1, true); + ss << ", "; + ccprintf(ss, "#%d", imm); + ss << ", "; + printVecPredReg(ss, gp); + ss << ", "; + printIntReg(ss, op2); + return ss.str(); +} + +std::string +SmeMovInsertOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "#%d", imm); + ss << ", "; + printVecReg(ss, op1, true); + ss << ", "; + printVecPredReg(ss, gp); + ss << ", "; + printIntReg(ss, op2); + return ss.str(); +} + +std::string +SmeOPOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, "#%d", imm); + ss << ", "; + printVecPredReg(ss, gp1); + ss << ", "; + printVecPredReg(ss, gp2); + ss << ", "; + printVecReg(ss, op1, true); + ss << ", "; + printVecReg(ss, op2, true); + return ss.str(); +} + +std::string +SmeRdsvlOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ss << ", "; + printVecReg(ss, dest); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + +std::string +SmeZeroOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ArmStaticInst::printMnemonic(ss, "", false); + ccprintf(ss, "#%d", imm); + return ss.str(); +} + +} // namespace ArmISA +} // namespace gem5 diff --git a/src/arch/arm/insts/sme.hh b/src/arch/arm/insts/sme.hh new file mode 100644 index 0000000000..d6cbdde5a7 --- /dev/null +++ b/src/arch/arm/insts/sme.hh @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2022 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_ARM_INSTS_SME_HH__ +#define __ARCH_ARM_INSTS_SME_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace gem5 +{ + +namespace ArmISA +{ + +// Used for SME ADDHA/ADDVA +class SmeAddOp : public ArmStaticInst +{ + protected: + uint64_t imm; + RegIndex op1; + RegIndex gp1; + RegIndex gp2; + + SmeAddOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint64_t _imm, RegIndex _op1, + RegIndex _gp1, RegIndex _gp2) : + ArmStaticInst(mnem, _machInst, __opClass), + imm(_imm), op1(_op1), gp1(_gp1), gp2(_gp2) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for the SME ADDSPL/ADDSVL instructions +class SmeAddVlOp : public ArmStaticInst +{ + protected: + RegIndex dest; + RegIndex op1; + int8_t imm; + + SmeAddVlOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, RegIndex _dest, RegIndex _op1, + int8_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for SME LD1x/ST1x instrucions +class SmeLd1xSt1xOp : public ArmStaticInst +{ + protected: + uint64_t imm; + RegIndex op1; + RegIndex gp; + RegIndex op2; + RegIndex op3; + bool V; + + SmeLd1xSt1xOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint64_t _imm, RegIndex _op1, + RegIndex _gp, RegIndex _op2, + RegIndex _op3, bool _V) : + ArmStaticInst(mnem, _machInst, __opClass), + imm(_imm), op1(_op1), gp(_gp), op2(_op2), op3(_op3), V(_V) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for SME LDR/STR instructions +class SmeLdrStrOp : public ArmStaticInst +{ + protected: + uint64_t imm; + RegIndex op1; + RegIndex op2; + + SmeLdrStrOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint64_t _imm, RegIndex _op1, + RegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + imm(_imm), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for SME MOVA (Tile to Vector) +class SmeMovExtractOp : public ArmStaticInst +{ + protected: + RegIndex op1; + uint8_t imm; + RegIndex gp; + RegIndex op2; + bool v; + + SmeMovExtractOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, RegIndex _op1, uint8_t _imm, + RegIndex _gp, RegIndex _op2, bool _v) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), imm(_imm), gp(_gp), op2(_op2), v(_v) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for SME MOVA (Vector to Tile) +class SmeMovInsertOp : public ArmStaticInst +{ + protected: + uint8_t imm; + RegIndex op1; + RegIndex gp; + RegIndex op2; + bool v; + + SmeMovInsertOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _imm, RegIndex _op1, + RegIndex _gp, RegIndex _op2, bool _v) : + ArmStaticInst(mnem, _machInst, __opClass), + imm(_imm), op1(_op1), gp(_gp), op2(_op2), v(_v) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for SME output product instructions +class SmeOPOp : public ArmStaticInst +{ + protected: + uint64_t imm; + RegIndex op1; + RegIndex gp1; + RegIndex gp2; + RegIndex op2; + + SmeOPOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint64_t _imm, RegIndex _op1, RegIndex _gp1, + RegIndex _gp2, RegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + imm(_imm), op1(_op1), gp1(_gp1), gp2(_gp2), op2(_op2) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for the SME RDSVL instruction +class SmeRdsvlOp : public ArmStaticInst +{ + protected: + RegIndex dest; + int8_t imm; + + SmeRdsvlOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, RegIndex _dest, int8_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +// Used for SME ZERO +class SmeZeroOp : public ArmStaticInst +{ + protected: + uint8_t imm; + + SmeZeroOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + imm(_imm) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + +} // namespace ArmISA +} // namespace gem5 + +#endif // __ARCH_ARM_INSTS_SME_HH__ diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc index 9a525b195d..9d9c2bcb1c 100644 --- a/src/arch/arm/insts/sve.cc +++ b/src/arch/arm/insts/sve.cc @@ -161,6 +161,24 @@ SveWhileOp::generateDisassembly( return ss.str(); } +std::string +SvePselOp::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecPredReg(ss, dest); + ss << ", "; + printVecPredReg(ss, op1); + ss << ", "; + printVecPredReg(ss, gp); + ss << ", "; + printIntReg(ss, op2); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + std::string SveCompTermOp::generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const @@ -831,6 +849,20 @@ SveComplexIdxOp::generateDisassembly( return ss.str(); } +std::string +SveClampOp::generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ss << ", "; + printVecReg(ss, op1, true); + ss << ", "; + printVecReg(ss, op2, true); + return ss.str(); +} + std::string sveDisasmPredCountImm(uint8_t imm) { diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh index f9939e1f22..63a59d493a 100644 --- a/src/arch/arm/insts/sve.hh +++ b/src/arch/arm/insts/sve.hh @@ -180,6 +180,28 @@ class SveWhileOp : public ArmStaticInst Addr pc, const loader::SymbolTable *symtab) const override; }; +/// Psel predicate selection SVE instruction. +class SvePselOp : public ArmStaticInst +{ + protected: + RegIndex dest; + RegIndex op1; + RegIndex gp; + RegIndex op2; + uint64_t imm; + + SvePselOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, RegIndex _dest, + RegIndex _op1, RegIndex _gp, + RegIndex _op2, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), gp(_gp), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + /// Compare and terminate loop SVE instruction. class SveCompTermOp : public ArmStaticInst { @@ -951,6 +973,25 @@ class SveComplexIdxOp : public ArmStaticInst Addr pc, const loader::SymbolTable *symtab) const override; }; +// SVE2 SCLAMP/UCLAMP instructions +class SveClampOp : public ArmStaticInst +{ + protected: + RegIndex dest; + RegIndex op1; + RegIndex op2; + + SveClampOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, RegIndex _dest, + RegIndex _op1, RegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly( + Addr pc, const Loader::SymbolTable *symtab) const override; +}; + /// Returns the symbolic name associated with pattern `imm` for PTRUE(S) /// instructions. diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa index 37eb995bfd..2fd28f8209 100644 --- a/src/arch/arm/isa/formats/aarch64.isa +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -436,6 +436,9 @@ namespace Aarch64 // SP return new MsrImm64( machInst, MISCREG_SPSEL, crm); + case 0x1b: + // SVE SVCR - SMSTART/SMSTOP + return decodeSmeMgmt(machInst); case 0x1e: // DAIFSet return new MsrImmDAIFSet64( @@ -3073,20 +3076,30 @@ def format Aarch64() {{ using namespace Aarch64; if (bits(machInst, 27) == 0x0) { if (bits(machInst, 28) == 0x0) { - if (bits(machInst, 26, 25) != 0x2) { - return new Unknown64(machInst); - } - if (bits(machInst, 31) == 0x0) { - switch (bits(machInst, 30, 29)) { - case 0x0: - case 0x1: - case 0x2: - return decodeSveInt(machInst); - case 0x3: - return decodeSveFp(machInst); + if (bits(machInst, 26) == 0x1) { + if (bits(machInst, 31) == 0x0) { + if (bits(machInst, 25) == 0x1) { + return new Unknown64(machInst); + } + switch (bits(machInst, 30, 29)) { + case 0x0: + case 0x1: + case 0x2: + return decodeSveInt(machInst); + case 0x3: + return decodeSveFp(machInst); + } + } else { + return decodeSveMem(machInst); } } else { - return decodeSveMem(machInst); + if ((bits(machInst, 25) == 0x0) && \ + (bits(machInst, 31) == 0x1)) { + // bit 31:25=1xx0000 + return decodeSmeInst(machInst); + } else { + return new Unknown64(machInst); + } } } else if (bits(machInst, 26) == 0) // bit 28:26=100 diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa index 5ef65966af..0a1f8f8ce2 100644 --- a/src/arch/arm/isa/formats/formats.isa +++ b/src/arch/arm/isa/formats/formats.isa @@ -52,6 +52,9 @@ ##include "sve_top_level.isa" ##include "sve_2nd_level.isa" +//Include support for decoding SME instructions (AArch64-only) +##include "sme.isa" + //Include support for predicated instructions ##include "pred.isa" diff --git a/src/arch/arm/isa/formats/sme.isa b/src/arch/arm/isa/formats/sme.isa new file mode 100644 index 0000000000..ac75d151ce --- /dev/null +++ b/src/arch/arm/isa/formats/sme.isa @@ -0,0 +1,738 @@ +// Copyright (c) 2022 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +output header {{ +namespace Aarch64 +{ + StaticInstPtr decodeSmeMgmt(ExtMachInst); + StaticInstPtr decodeSmeInst(ExtMachInst); + + StaticInstPtr decodeSmeOp32(ExtMachInst); + StaticInstPtr decodeSmeOpFp32(ExtMachInst); + StaticInstPtr decodeSmeOpBf16(ExtMachInst); + StaticInstPtr decodeSmeOpFp16(ExtMachInst); + StaticInstPtr decodeSmeOpInt8(ExtMachInst); + + StaticInstPtr decodeSmeOp64(ExtMachInst); + StaticInstPtr decodeSmeOpFp64(ExtMachInst); + StaticInstPtr decodeSmeOpInt16(ExtMachInst); + + StaticInstPtr decodeSmeMovaInsert(ExtMachInst); + StaticInstPtr decodeSmeMovaExtract(ExtMachInst); + + StaticInstPtr decodeSmeMisc(ExtMachInst); + StaticInstPtr decodeSmeZero(ExtMachInst); + + StaticInstPtr decodeSmeAddArray(ExtMachInst); + StaticInstPtr decodeSmeAddhv(ExtMachInst); + + StaticInstPtr decodeSmeMemory(ExtMachInst); + StaticInstPtr decodeSmeLoad(ExtMachInst); + StaticInstPtr decodeSmeStore(ExtMachInst); + StaticInstPtr decodeSmeLoadStoreArray(ExtMachInst); + StaticInstPtr decodeSmeLoadQuadWord(ExtMachInst); + StaticInstPtr decodeSmeStoreQuadWord(ExtMachInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + // NOTE: This is called from a different decode tree (aarch64.isa). + // For neatness and clarity we keep the code here order to keep all + // SME things together. + StaticInstPtr + decodeSmeMgmt(ExtMachInst machInst) + { + const uint8_t imm = (uint8_t)bits(machInst, 10, 8); + + if (bits(machInst, 8)) { + return new SmeSmstart(machInst, imm); + } else { + return new SmeSmstop(machInst, imm); + } + } + + StaticInstPtr + decodeSmeInst(ExtMachInst machInst) + { + // Starting point for decoding: bits 31:25=1xx0000 + + const uint8_t op0 = (uint8_t)bits(machInst, 30, 29); + const uint8_t op1 = (uint8_t)bits(machInst, 24, 19); + const uint8_t op2 = (uint8_t)bits(machInst, 17); + const uint8_t op3 = (uint8_t)bits(machInst, 4, 2); + + if ((op0 & 0b10) == 0b00) { + if ((op1 & 0b011000) == 0b010000) { + if ((op3 & 0b001) == 0b000) { + return decodeSmeOp32(machInst); + } + } + + if ((op1 & 0b011000) == 0b011000) { + if ((op3 & 0b010) == 0b000) { + return decodeSmeOp64(machInst); + } + } + } + + if (op0 == 0b10) { + if ((op1 & 0b100111) == 0b000000) { + if (op2 == 0b0) { + if ((op3 & 0b100) == 0b000) { + return decodeSmeMovaInsert(machInst); + } + } + + if (op2 ==0b1) { + return decodeSmeMovaExtract(machInst); + } + } + + if ((op1 & 0b100111) == 0b000001) { + return decodeSmeMisc(machInst); + } + + if ((op1 & 0b100111) == 0b000010) { + if ((op3 & 0b010) == 0b000) { + return decodeSmeAddArray(machInst); + } + } + } + + if (op0 == 0b11) { + return decodeSmeMemory(machInst); + } + + // We should not get here + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOp32(ExtMachInst machInst) + { + const uint8_t op0 = (uint8_t)bits(machInst, 29); + const uint8_t op1 = (uint8_t)bits(machInst, 24); + const uint8_t op2 = (uint8_t)bits(machInst, 21); + const uint8_t op3 = (uint8_t)bits(machInst, 3); + + if (op0 == 0) { + if (op1 == 0) { + if (op2 == 0) { + if (op3 == 0) { + return decodeSmeOpFp32(machInst); + } + } + } + + if (op1 == 1) { + if (op2 == 0) { + if (op3 == 0) { + return decodeSmeOpBf16(machInst); + } + } + + if (op2 == 1) { + if (op3 == 0) { + return decodeSmeOpFp16(machInst); + } + } + } + } + + if (op0 == 1) { + if (op3 == 0) { + return decodeSmeOpInt8(machInst); + } + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOpFp32(ExtMachInst machInst) + { + const uint32_t S = (uint32_t)bits(machInst, 4, 4); + + const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13)); + const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 1, 0)); + + if (S == 0) { + return new SmeFmopa(machInst, ZAda, Zn, + Pn, Pm, Zm); + } else { + return new SmeFmops(machInst, ZAda, Zn, + Pn, Pm, Zm); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOpBf16(ExtMachInst machInst) + { + // The following code is functionally correct for decode, but + // remains commented out as the current gem5 fplib implementation + // doesn't support BF16, and hence the instructions themselves + // remain unimplemented. Once these have been implemented, this code + // can be safely uncommented to enable decode for the two BF16 Outer + // Product instructions added by FEAT_SME. + + // const uint32_t S = (uint32_t)bits(machInst, 4, 4); + + // const RegIndex Zm = (RegIndex)(uint32_t)( + // bits(machInst, 20, 16)); + // const RegIndex Zn = (RegIndex)(uint32_t)( + // bits(machInst, 9, 5)); + // const RegIndex Pn = (RegIndex)(uint32_t)( + // bits(machInst, 12, 10)); + // const RegIndex Pm = (RegIndex)(uint32_t)( + // bits(machInst, 15, 13)); + // const RegIndex ZAda = (RegIndex)(uint32_t)( + // bits(machInst, 1, 0)); + + // if (S == 0) { + // return new SmeBmopa(machInst); + // } else { + // return new SmeBmops(machInst); + // } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOpFp16(ExtMachInst machInst) + { + const uint32_t S = (uint32_t)bits(machInst, 4, 4); + + const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13)); + const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 1, 0)); + + if (S == 0) { + return new SmeFmopaWidening(machInst, ZAda, Zn, + Pn, Pm, Zm); + } else { + return new SmeFmopsWidening(machInst, ZAda, Zn, + Pn, Pm, Zm); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOpInt8(ExtMachInst machInst) + { + const uint32_t u0 = (uint32_t)bits(machInst, 24); + const uint32_t u1 = (uint32_t)bits(machInst, 21); + const uint32_t S = (uint32_t)bits(machInst, 4); + + const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13)); + const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 1, 0)); + + if (u0 == 0) { + if (u1 == 0) { + if (S == 0) { + return new SmeSmopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeSmops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } else { + if (S == 0) { + return new SmeSumopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeSumops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } + } else { + if (u1 == 0) { + if (S == 0) { + return new SmeUsmopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeUsmops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } else { + if (S == 0) { + return new SmeUmopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeUmops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOp64(ExtMachInst machInst) + { + const uint8_t op0 = (uint8_t)bits(machInst, 29); + const uint8_t op1 = (uint8_t)bits(machInst, 24); + const uint8_t op2 = (uint8_t)bits(machInst, 21); + + if (op0 == 0) { + if (op1 == 0) { + if (op2 == 0) { + return decodeSmeOpFp64(machInst); + } + } + } + + if (op0 == 1) { + return decodeSmeOpInt16(machInst); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOpFp64(ExtMachInst machInst) + { + const uint32_t S = (uint32_t)bits(machInst, 4, 4); + + const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13)); + const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 2, 0)); + + if (S == 0) { + return new SmeFmopa(machInst, ZAda, Zn, + Pn, Pm, Zm); + } else { + return new SmeFmops(machInst, ZAda, Zn, + Pn, Pm, Zm); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeOpInt16(ExtMachInst machInst) + { + const uint32_t u0 = (uint32_t)bits(machInst, 24); + const uint32_t u1 = (uint32_t)bits(machInst, 21); + const uint32_t S = (uint32_t)bits(machInst, 4); + + const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13)); + const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 2, 0)); + + if (u0 == 0) { + if (u1 == 0) { + if (S == 0) { + return new SmeSmopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeSmops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } else { + if (S == 0) { + return new SmeSumopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeSumops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } + } else { + if (u1 == 0) { + if (S == 0) { + return new SmeUsmopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeUsmops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } else { + if (S == 0) { + return new SmeUmopa( + machInst, ZAda, Zn, Pn, Pm, Zm); + } else { + return new SmeUmops( + machInst, ZAda, Zn, Pn, Pm, Zm); + } + } + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeMovaInsert(ExtMachInst machInst) + { + const uint8_t op0 = (uint8_t)bits(machInst, 18); + + if (op0 == 1) { + return new Unknown64(machInst); + } + + const uint32_t size = (uint32_t)bits(machInst, 23, 22); + const uint32_t Q = (uint32_t)bits(machInst, 16, 16); + + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Ws = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex ZAd_imm = (RegIndex)(uint32_t)( + bits(machInst, 3, 0)); + const bool V = (bool)bits(machInst, 15); + + if (Q == 0) { + switch (size) { + case 0b00: + return new SmeMovaInsert(machInst, ZAd_imm, + Zn, Pg, Ws, V); + case 0b01: + return new SmeMovaInsert(machInst, ZAd_imm, + Zn, Pg, Ws, V); + case 0b10: + return new SmeMovaInsert(machInst, ZAd_imm, + Zn, Pg, Ws, V); + case 0b11: + return new SmeMovaInsert(machInst, ZAd_imm, + Zn, Pg, Ws, V); + default: + break; + } + } + + if ((Q == 1) && (size == 0b11)) { + return new SmeMovaInsert<__uint128_t>(machInst, ZAd_imm, + Zn, Pg, Ws, V); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeMovaExtract(ExtMachInst machInst) + { + const uint8_t op0 = (uint8_t)bits(machInst, 18); + const uint8_t op1 = (uint8_t)bits(machInst, 9); + + if ((op0 == 1) || (op1 == 1)) { + return new Unknown64(machInst); + } + + const uint32_t size = (uint32_t)bits(machInst, 23, 22); + const uint32_t Q = (uint32_t)bits(machInst, 16, 16); + + const RegIndex Zd = (RegIndex)(uint32_t)(bits(machInst, 4, 0)); + const RegIndex Ws = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex ZAn_imm = (RegIndex)(uint32_t)( + bits(machInst, 8, 5)); + const bool V = (bool)bits(machInst, 15); + + if (Q == 0) { + switch (size) { + case 0b00: + return new SmeMovaExtract(machInst, Zd, + ZAn_imm, Pg, Ws, V); + case 0b01: + return new SmeMovaExtract(machInst, Zd, + ZAn_imm, Pg, Ws, V); + case 0b10: + return new SmeMovaExtract(machInst, Zd, + ZAn_imm, Pg, Ws, V); + case 0b11: + return new SmeMovaExtract(machInst, Zd, + ZAn_imm, Pg, Ws, V); + default: + break; + } + } + + if ((Q == 1) && (size == 0b11)) { + return new SmeMovaExtract<__uint128_t>(machInst, Zd, + ZAn_imm, Pg, Ws, V); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeMisc(ExtMachInst machInst) + { + const uint32_t op0 = (uint32_t)bits(machInst, 23, 22); + const uint32_t op1 = (uint32_t)bits(machInst, 18, 8); + + if (op0 == 0b00) { + if (op1 == 0b00000000000) { + return decodeSmeZero(machInst); + } + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeZero(ExtMachInst machInst) + { + const uint8_t imm8 = (uint8_t)bits(machInst, 7, 0); + + return new SmeZero(machInst, imm8); + } + + StaticInstPtr + decodeSmeAddArray(ExtMachInst machInst) + { + const uint32_t op0 = (uint32_t)bits(machInst, 23); + const uint32_t op1 = (uint32_t)bits(machInst, 18, 17); + const uint32_t op2 = (uint32_t)bits(machInst, 4); + + if (op0 == 1) { + if (op1 == 0b00) { + if (op2 == 0) { + return decodeSmeAddhv(machInst); + } + } + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeAddhv(ExtMachInst machInst) + { + const uint32_t V = (uint32_t)bits(machInst, 16, 16); + const uint32_t op = (uint32_t)bits(machInst, 22, 22); + const uint32_t op2 = (uint32_t)bits(machInst, 2, 0); + + const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5)); + const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13)); + const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 2, 0)); + + if (op == 0) { // 32-bit + if (V == 0) { + if ((op2 & 0b100) == 0b000) { + return new SmeAddha(machInst, ZAda, Zn, Pn, Pm); + } + } else { + if ((op2 & 0b100) == 0b000) { + return new SmeAddva(machInst, ZAda, Zn, Pn, Pm); + } + } + } else { + if (V == 0) { + return new SmeAddha(machInst, ZAda, Zn, Pn, Pm); + } else { + return new SmeAddva(machInst, ZAda, Zn, Pn, Pm); + } + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeMemory(ExtMachInst machInst) + { + const uint8_t op0 = (uint8_t)bits(machInst, 24, 21); + const uint8_t op1 = (uint8_t)bits(machInst, 20, 15); + const uint8_t op2 = (uint8_t)bits(machInst, 12, 10); + const uint8_t op3 = (uint8_t)bits(machInst, 4); + + if ((op0 & 0b1001) == 0b0000) { + if (op3 == 0b0) { + return decodeSmeLoad(machInst); + } + } + + if ((op0 & 0b1001) == 0b0001) { + if (op3 == 0b0) { + return decodeSmeStore(machInst); + } + } + + if ((op0 & 0b1110) == 0b1000) { + if (op1 == 0b000000) { + if (op2 == 0b000) { + if (op3 == 0b0) { + return decodeSmeLoadStoreArray(machInst); + } + } + } + } + + if (op0 == 0b1110) { + if (op3 == 0b0) { + return decodeSmeLoadQuadWord(machInst); + } + } + + if (op0 == 0b1111) { + if (op3 == 0b0) { + return decodeSmeStoreQuadWord(machInst); + } + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeLoad(ExtMachInst machInst) + { + const uint8_t msz = (uint8_t)bits(machInst, 23, 22); + const bool V = (bool)bits(machInst, 15); + + const RegIndex Rn = makeSP( + (RegIndex)(uint32_t)bits(machInst, 9, 5)); + const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Rs = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const uint32_t ZAt_imm = (uint32_t)bits(machInst, 3, 0); + const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + + switch(msz) + { + case 0b00: + return new SmeLd1b(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + case 0b01: + return new SmeLd1h(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + case 0b10: + return new SmeLd1w(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + case 0b11: + return new SmeLd1d(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + default: + break; + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeStore(ExtMachInst machInst) + { + const uint8_t msz = (uint8_t)bits(machInst, 23, 22); + const bool V = (bool)bits(machInst, 15); + + const RegIndex Rn = makeSP( + (RegIndex)(uint32_t)bits(machInst, 9, 5)); + const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Rs = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const uint32_t ZAt_imm = (uint32_t)bits(machInst, 3, 0); + const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + + switch(msz) + { + case 0b00: + return new SmeSt1b(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + case 0b01: + return new SmeSt1h(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + case 0b10: + return new SmeSt1w(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + case 0b11: + return new SmeSt1d(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V); + default: + break; + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeLoadStoreArray(ExtMachInst machInst) + { + const uint8_t op = (uint8_t)bits(machInst, 21); + + const RegIndex Rn = makeSP( + (RegIndex)(uint32_t)bits(machInst, 9, 5)); + const RegIndex Rv = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const uint32_t imm4 = (uint32_t)bits(machInst, 3, 0); + + if (op == 0) { + return new SmeLdr(machInst, imm4, Rn, Rv); + } else { + return new SmeStr(machInst, imm4, Rn, Rv); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeSmeLoadQuadWord(ExtMachInst machInst) + { + const bool V = (bool)bits(machInst, 15); + + const RegIndex Rn = makeSP( + (RegIndex)(uint32_t)bits(machInst, 9, 5)); + const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Rs = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const uint32_t ZAt = (uint32_t)bits(machInst, 3, 0); + const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + + return new SmeLd1q<__uint128_t>(machInst, ZAt, Rn, Pg, Rs, Rm, V); + } + + StaticInstPtr + decodeSmeStoreQuadWord(ExtMachInst machInst) + { + const bool V = (bool)bits(machInst, 15); + + const RegIndex Rn = makeSP( + (RegIndex)(uint32_t)bits(machInst, 9, 5)); + const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16)); + const RegIndex Rs = (RegIndex)(uint32_t)( + bits(machInst, 14, 13) + 12); + const uint32_t ZAt = (uint32_t)bits(machInst, 3, 0); + const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10)); + + return new SmeSt1q<__uint128_t>(machInst, ZAt, Rn, Pg, Rs, Rm, V); + } +} +}}; diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index cbd5466b82..2ee3817445 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -605,22 +605,43 @@ namespace Aarch64 { uint8_t b23_22 = bits(machInst, 23, 22); uint8_t b11 = bits(machInst, 11); - if ((b23_22 & 0x2) == 0x0 && b11 == 0x0) { - RegIndex rd = makeSP( - (RegIndex) (uint8_t) bits(machInst, 4, 0)); - RegIndex rn = makeSP( - (RegIndex) (uint8_t) bits(machInst, 20, 16)); - uint64_t imm = sext<6>(bits(machInst, 10, 5)); - if ((b23_22 & 0x1) == 0x0) { - return new AddvlXImm(machInst, rd, rn, imm); - } else { - return new AddplXImm(machInst, rd, rn, imm); + if (b11 == 0x0) { + if ((b23_22 & 0x2) == 0x0) { + RegIndex rd = makeSP( + (RegIndex) (uint8_t) bits(machInst, 4, 0)); + RegIndex rn = makeSP( + (RegIndex) (uint8_t) bits(machInst, 20, 16)); + uint64_t imm = sext<6>(bits(machInst, 10, 5)); + if ((b23_22 & 0x1) == 0x0) { + return new AddvlXImm(machInst, rd, rn, imm); + } else { + return new AddplXImm(machInst, rd, rn, imm); + } + } else if (b23_22 == 0x2) { + RegIndex rd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = sext<6>(bits(machInst, 10, 5)); + if (bits(machInst, 20, 16) == 0x1f) { + return new SveRdvl(machInst, rd, imm); + } } - } else if (b23_22 == 0x2 && b11 == 0x0) { - RegIndex rd = (RegIndex) (uint8_t) bits(machInst, 4, 0); - uint64_t imm = sext<6>(bits(machInst, 10, 5)); - if (bits(machInst, 20, 16) == 0x1f) { - return new SveRdvl(machInst, rd, imm); + } else { // b11 == 1 + if ((b23_22 & 0x2) == 0x0) { + RegIndex rd = makeSP( + (RegIndex) (uint8_t) bits(machInst, 4, 0)); + RegIndex rn = makeSP( + (RegIndex) (uint8_t) bits(machInst, 20, 16)); + uint64_t imm = sext<6>(bits(machInst, 10, 5)); + if ((b23_22 & 0x1) == 0x0) { + return new SmeAddsvl(machInst, rd, rn, imm); + } else { + return new SmeAddspl(machInst, rd, rn, imm); + } + } else if (b23_22 == 0x2) { + RegIndex rd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + uint64_t imm = sext<6>(bits(machInst, 10, 5)); + if (bits(machInst, 20, 16) == 0x1f) { + return new SmeRdsvl(machInst, rd, imm); + } } } return new Unknown64(machInst); @@ -1201,6 +1222,18 @@ namespace Aarch64 zdn, zm, pg); } break; + case 0xE: + if(!b13) { + unsigned size = (unsigned) bits(machInst, 23, 22); + RegIndex pg = (RegIndex)(uint8_t) bits(machInst, 12, 10); + RegIndex zn = (RegIndex)(uint8_t) bits(machInst, 9, 5); + RegIndex zd = (RegIndex)(uint8_t) bits(machInst, 4, 0); + + if (size == 0b00) { + return new SveRevd<__uint128_t>(machInst, zd, zn, pg); + } + } + break; } switch (bits(machInst, 20, 17)) { case 0x0: @@ -1951,6 +1984,36 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveIntCmpSca + StaticInstPtr + decodeSvePsel(ExtMachInst machInst) + { + RegIndex Pd = (RegIndex)(uint8_t)bits(machInst, 3, 0); + RegIndex Pn = (RegIndex)(uint8_t)bits(machInst, 8, 5); + RegIndex Pg = (RegIndex)(uint8_t)bits(machInst, 13, 10); + RegIndex Rm = (RegIndex)(0b01100 + + (uint8_t)bits(machInst, 17, 16)); + uint8_t imm = (uint8_t)bits(machInst, 20, 18); + imm += (uint8_t)bits(machInst, 23, 22) << 3; + + const uint8_t size = imm & 0xF; + + if (size == 0) { + return new Unknown64(machInst); + } + + if (size & 0b0001) { + return new SvePsel(machInst, Pd, Pn, Pg, Rm, imm >> 1); + } else if (size & 0b0010) { + return new SvePsel(machInst, Pd, Pn, Pg, Rm, imm >> 2); + } else if (size & 0b0100) { + return new SvePsel(machInst, Pd, Pn, Pg, Rm, imm >> 3); + } else if (size & 0b1000) { + return new SvePsel(machInst, Pd, Pn, Pg, Rm, imm >> 4); + } + + return new Unknown64(machInst); + } // decodeSvePsel + StaticInstPtr decodeSveIntWideImmUnpred0(ExtMachInst machInst) { @@ -2106,6 +2169,48 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveIntWideImmUnpred + StaticInstPtr + decodeSveClamp(ExtMachInst machInst) + { + RegIndex zda = (RegIndex)(uint8_t)bits(machInst, 4, 0); + RegIndex zn = (RegIndex)(uint8_t)bits(machInst, 9, 5); + RegIndex zm = (RegIndex)(uint8_t)bits(machInst, 20, 16); + + switch(bits(machInst, 10)) { + case 0: + switch(bits(machInst, 23, 22)) { + case 0x0: + return new SveSclamp(machInst, zm, zn, zda); + case 0x1: + return new SveSclamp(machInst, zm, zn, zda); + case 0x2: + return new SveSclamp(machInst, zm, zn, zda); + case 0x3: + return new SveSclamp(machInst, zm, zn, zda); + default: + break; + } + break; + case 1: + switch(bits(machInst, 23, 22)) { + case 0x0: + return new SveUclamp(machInst, zm, zn, zda); + case 0x1: + return new SveUclamp(machInst, zm, zn, zda); + case 0x2: + return new SveUclamp(machInst, zm, zn, zda); + case 0x3: + return new SveUclamp(machInst, zm, zn, zda); + default: + break; + } + default: + break; + } + + return new Unknown64(machInst); + } + StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst) { diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa index 803029a2a4..155ec1c42f 100644 --- a/src/arch/arm/isa/formats/sve_top_level.isa +++ b/src/arch/arm/isa/formats/sve_top_level.isa @@ -66,7 +66,9 @@ namespace Aarch64 StaticInstPtr decodeSvePredGen(ExtMachInst machInst); StaticInstPtr decodeSvePredCount(ExtMachInst machInst); StaticInstPtr decodeSveIntCmpSca(ExtMachInst machInst); + StaticInstPtr decodeSvePsel(ExtMachInst machInst); StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveClamp(ExtMachInst machInst); StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst); StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst); @@ -107,6 +109,9 @@ namespace Aarch64 case 0x0: { if (bits(machInst, 14)) { + if (bits(machInst, 15, 11) == 0b11000) { + return decodeSveClamp(machInst); + } return decodeSveIntMulAdd(machInst); } else { uint8_t b_15_13 = (bits(machInst, 15) << 1) | @@ -210,10 +215,14 @@ namespace Aarch64 case 0x7: { uint8_t b_15_14 = bits(machInst, 15, 14); + uint8_t b_4 = bits(machInst, 4, 4); switch (b_15_14) { case 0x0: return decodeSveIntCmpSca(machInst); case 0x1: + if (b_4 == 0) { + return decodeSvePsel(machInst); + } return new Unknown64(machInst); case 0x2: return decodeSvePredCount(machInst); diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 386af4e05d..e2534a6728 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -61,6 +61,7 @@ output header {{ #include "arch/arm/insts/neon64_mem.hh" #include "arch/arm/insts/pred_inst.hh" #include "arch/arm/insts/pseudo.hh" +#include "arch/arm/insts/sme.hh" #include "arch/arm/insts/static_inst.hh" #include "arch/arm/insts/sve.hh" #include "arch/arm/insts/sve_mem.hh" diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index 0697ca49d2..cdc162f2b5 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -105,6 +105,9 @@ split decoder; ##include "sve.isa" ##include "sve_mem.isa" +//SME +##include "sme.isa" + //m5 Pseudo-ops ##include "m5ops.isa" diff --git a/src/arch/arm/isa/insts/sme.isa b/src/arch/arm/isa/insts/sme.isa new file mode 100644 index 0000000000..b9f6115432 --- /dev/null +++ b/src/arch/arm/isa/insts/sme.isa @@ -0,0 +1,821 @@ +// Copyright (c) 2022 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// @file Definition of SME instructions. + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + def smeAddInst(name, Name, opClass, types, op): + global header_output, decoder_output, exec_output + code = smEnCheckCode + smeZaWrite + ''' + // imm stores the tile index + // op1 is the source SVE vector register + // gp1 is the row predecate register + // gp2 is the column predecate register + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + uint8_t tile_index = imm & 0x7; + + // View the tile as the correct data type, extract the sub-tile + auto tile = getTile(ZA, tile_index); + ''' + code += op + + iop = InstObjParams(name, "Sme" + Name, "SmeAddOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeAddDeclare.subst(iop) + exec_output += SmeTemplatedExecute.subst(iop) + + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sme' + Name} + exec_output += SmeOpExecDeclare.subst(substDict) + + def smeAddVlInst(name, Name, opClass, op): + global header_output, decoder_output, exec_output + code = smEnCheckCodeNoPstate + ''' + // dest is the 64-bit destination register + // op1 is the 64-bit source register + // imm is a signed multiplier + ''' + code += op + + iop = InstObjParams(name, "Sme" + Name, "SmeAddVlOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeAddVlDeclare.subst(iop) + exec_output += SmeExecute.subst(iop) + + def smeLd1xInst(name, Name, opClass, types): + global header_output, decoder_output, exec_output + code = smEnCheckCode + smeZaWrite + ''' + // imm stores the tile number as well as the vector offset. The + // size of the fields changes based on the data type being used. + // XOp1 stores Rn + // GpOp stores the governing predicate register + // WOp2 stores Rs - the vector index register + // XOp3 stores Rm - the offset register (applied to Rn) + + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem)))); + M5_VAR_USED uint8_t tile_idx = + imm >> (4 - findMsbSet(sizeof(TPElem))); + M5_VAR_USED uint8_t vec_idx = (WOp2 + offset) % eCount; + + // Calculate the address + M5_VAR_USED Addr EA = XOp1 + XOp3 * sizeof(TPElem); + + // Calculate the read predicate. One boolean per byte, + // initialised to all true. + auto rdEn = std::vector(eCount * sizeof(TPElem), true); + for (int i = 0; i < eCount; ++i) { + if (GpOp_x[i]) { + continue; + } + + // Mark each byte of the corresponding elem as false + for (int j = 0; j < sizeof(TPElem); ++j) { + rdEn[i * sizeof(TPElem) + j] = false; + } + } + ''' + + zaWriteCode = ''' + // Here we write the data we just got from memory to the tile: + if (V) { + auto col = getTileVSlice(ZA, tile_idx, vec_idx); + for(int i = 0; i < eCount; ++i) { + col[i] = GpOp_x[i] ? data[i] : 0; + } + } else { + auto row = getTileHSlice(ZA, tile_idx, vec_idx); + for(int i = 0; i < eCount; ++i) { + row[i] = GpOp_x[i] ? data[i] : 0; + } + } + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeLd1xSt1xOp", + {'code': code, 'za_write': zaWriteCode, + 'op_class': opClass}, ['IsLoad', + 'IsNonSpeculative']) + header_output += SmeLd1xDeclare.subst(iop) + exec_output += SmeLd1xExecute.subst(iop) + exec_output += SmeLd1xInitiateAcc.subst(iop) + exec_output += SmeLd1xCompleteAcc.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sme' + Name} + exec_output += SmeLd1xExecDeclare.subst(substDict) + + def smeLdrInst(name, Name, opClass): + global header_output, decoder_output, exec_output + code = smEnCheckCodeNoSM + smeZaWrite + ''' + // imm stores the vector offset. We do not have a tile number as + // we target the whole accumulator array. + // imm also stores the offset applied to the base memory access + // register. + // Op1 stores Rn, which is the base memory access register + // Op2 stores Rv, which is the vector select register + + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + M5_VAR_USED uint8_t vec_index = (WOp2 + imm) % eCount; + + // Calculate the address + M5_VAR_USED Addr EA = XOp1 + imm; + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeLdrStrOp", + {'code': code, 'op_class': opClass}, + ['IsLoad', 'IsNonSpeculative']) + header_output += SmeLdrDeclare.subst(iop) + exec_output += SmeLdrExecute.subst(iop) + exec_output += SmeLdrInitiateAcc.subst(iop) + exec_output += SmeLdrCompleteAcc.subst(iop) + + def smeMovaExtractInst(name, Name, opClass, types): + global header_output, decoder_output, exec_output + code = smEnCheckCode + ''' + // imm stores the tile index + // op1 is the source SVE vector register + // gp is the governing predecate register + // op2 is the slice index register + // v is the row/col select immediate - true for column accesses + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem)))); + uint8_t tile_idx = imm >> (4 - findMsbSet(sizeof(TPElem))); + + uint32_t vec_idx = (WOp2 + offset) % eCount; + + if (!v) { // Horizontal (row) access + auto row = getTileHSlice(ZA, tile_idx, vec_idx); + for (int i = 0; i < eCount; ++i) { + if (!GpOp_x[i]) { + continue; + } + + AA64FpOp1_x[i] = row[i]; + } + } else { // Vertical (column) access + auto col = getTileVSlice(ZA, tile_idx, vec_idx); + for (int i = 0; i < eCount; ++i) { + if (!GpOp_x[i]) { + continue; + } + + AA64FpOp1_x[i] = col[i]; + } + } + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeMovExtractOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeMovaExtractDeclare.subst(iop) + exec_output += SmeTemplatedExecute.subst(iop) + + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sme' + Name} + exec_output += SmeOpExecDeclare.subst(substDict) + + def smeMovaInsertInst(name, Name, opClass, types): + global header_output, decoder_output, exec_output + code = smEnCheckCode + smeZaWrite + ''' + // imm stores the tile index + // op1 is the source SVE vector register + // gp is the governing predecate register + // op2 is the slice index register + // v is the row/col select immediate - true for column accesses + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem)))); + uint8_t tile_idx = imm >> (4 - findMsbSet(sizeof(TPElem))); + + uint32_t vec_idx = (WOp2 + offset) % eCount; + + if (!v) { // Horizontal (row) access + auto row = getTileHSlice(ZA, tile_idx, vec_idx); + for (int i = 0; i < eCount; ++i) { + if (!GpOp_x[i]) { + continue; + } + + row[i] = AA64FpOp1_x[i]; + } + } else { // Vertical (column) access + auto col = getTileVSlice(ZA, tile_idx, vec_idx); + for (int i = 0; i < eCount; ++i) { + if (!GpOp_x[i]) { + continue; + } + + col[i] = AA64FpOp1_x[i]; + } + } + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeMovInsertOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeMovaInsertDeclare.subst(iop) + exec_output += SmeTemplatedExecute.subst(iop) + + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sme' + Name} + exec_output += SmeOpExecDeclare.subst(substDict) + + def smeMsrInst(name, Name, opClass, op): + global header_output, decoder_output, exec_output + code = ''' + if (FullSystem) { + fault = this->checkSmeAccess(xc->tcBase(), Cpsr, Cpacr64); + if (fault != NoFault) { + return fault; + } + } + ''' + op + + iop = InstObjParams(name, "Sme" + Name, "ImmOp64", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative', 'IsSerializeAfter']) + header_output += SMEMgmtDeclare.subst(iop) + exec_output += SmeExecute.subst(iop) + + def smeFPOPInst(name, Name, opClass, srcTypes, dstTypes, op): + global header_output, decoder_output, exec_output + code = smEnCheckCode + smeZaWrite + ''' + // imm stores the tile index + // op1 is the first SVE vector register + // gp1 is the predecate register corresponding to the first + // SVE vector register + // gp2 is the predecate register corresponding to the second + // SVE vector register + // op2 is the second SVE vector register + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + ''' + code += op + + iop = InstObjParams(name, "Sme" + Name, "SmeOPOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeFPOPDeclare.subst(iop) + exec_output += SmeDualTemplatedExecute.subst(iop) + for src, dst in zip(srcTypes, dstTypes): + substDict = {'targs' : "{}, {}".format(src, dst), + 'class_name' : 'Sme' + Name} + exec_output += SmeOpExecDeclare.subst(substDict) + + def smeIntOPInst(name, Name, opClass, src1Types, src2Types, dstTypes, op): + global header_output, decoder_output, exec_output + code = smEnCheckCode + smeZaWrite + ''' + // imm stores the tile index + // op1 is the first SVE vector register + // gp1 is the predecate register corresponding to the first + // SVE vector register + // gp2 is the predecate register corresponding to the second + // SVE vector register + // op2 is the second SVE vector register + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + ''' + code += op + + iop = InstObjParams(name, "Sme" + Name, "SmeOPOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeIntOPDeclare.subst(iop) + exec_output += SmeTripleTemplatedExecute.subst(iop) + for src1, src2, dst in zip(src1Types, src2Types, dstTypes): + substDict = {'targs' : "{}, {}, {}".format(src1, src2, dst), + 'class_name' : 'Sme' + Name} + exec_output += SmeOpExecDeclare.subst(substDict) + + def smeRdsvlInst(name, Name, opClass): + global header_output, decoder_output, exec_output + code = smEnCheckCodeNoPstate + ''' + // dest is the 64-bit destination register + // imm is a signed multiplier + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + Dest64 = eCount * imm; + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeRdsvlOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeRdsvlDeclare.subst(iop) + exec_output += SmeExecute.subst(iop) + + def smeSt1xInst(name, Name, opClass, types): + global header_output, decoder_output, exec_output + code = smEnCheckCode + ''' + // imm stores the tile number as well as the vector offset. The + // size of the fields changes based on the data type being used. + // XOp1 stores Rn + // GpOp stores the governing predicate register + // WOp2 stores Rs - the vector index register + // XOp3 stores Rm - the offset register (applied to Rn) + + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem)))); + M5_VAR_USED uint8_t tile_idx = + imm >> (4 - findMsbSet(sizeof(TPElem))); + M5_VAR_USED uint8_t vec_idx = (WOp2 + offset) % eCount; + + // Calculate the address + M5_VAR_USED Addr EA = XOp1 + XOp3 * sizeof(TPElem); + + // Calculate the write predicate. One boolean per byte, + // initialised to all true. + auto wrEn = std::vector(eCount * sizeof(TPElem), true); + for (int i = 0; i < eCount; ++i) { + if (GpOp_x[i]) { + continue; + } + + // Mark each byte of the corresponding elem as false + for (int j = 0; j < sizeof(TPElem); ++j) { + wrEn[i * sizeof(TPElem) + j] = false; + } + } + + // Extract the data to be stored from the tile. We don't worry + // about the predicate here as that's already handled by wrEn. + TPElem data[MaxSmeVecLenInBytes / sizeof(TPElem)]; + if(V) { + auto col = getTileVSlice(ZA, tile_idx, vec_idx); + for (int i = 0; i < eCount; ++i) { + data[i] = col[i]; + } + } else { + auto row = getTileHSlice(ZA, tile_idx, vec_idx); + for (int i = 0; i < eCount; ++i) { + data[i] = row[i]; + } + } + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeLd1xSt1xOp", + {'code': code, 'op_class': opClass}, + ['IsStore', 'IsNonSpeculative']) + header_output += SmeSt1xDeclare.subst(iop) + exec_output += SmeSt1xExecute.subst(iop) + exec_output += SmeSt1xInitiateAcc.subst(iop) + exec_output += SmeSt1xCompleteAcc.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sme' + Name} + exec_output += SmeSt1xExecDeclare.subst(substDict) + + def smeStrInst(name, Name, opClass): + global header_output, decoder_output, exec_output + code = smEnCheckCodeNoSM + ''' + // imm stores the vector offset. We do not have a tile number + // as we target the whole accumulator array. + // imm also stores the offset applied to the base memory access + // register. + // Op1 stores Rn, which is the base memory access register + // Op2 stores Rv, which is the vector select register + + + unsigned eCount = ArmStaticInst::getCurSmeVecLen( + xc->tcBase()); + + uint8_t vec_index = (WOp2 + imm) % eCount; + + auto row = getTileHSlice(ZA, 0, vec_index); + + // Calculate the address + M5_VAR_USED Addr EA = XOp1 + imm; + + uint8_t data[MaxSmeVecLenInBytes]; + + // Update data which will then by used to store the row to memory + for (int i = 0; i < eCount; ++i) { + data[i] = row[i]; + } + ''' + + iop = InstObjParams(name, "Sme" + Name, "SmeLdrStrOp", + {'code': code, 'op_class': opClass}, + ['IsStore', 'IsNonSpeculative']) + header_output += SmeStrDeclare.subst(iop) + exec_output += SmeStrExecute.subst(iop) + exec_output += SmeStrInitiateAcc.subst(iop) + exec_output += SmeStrCompleteAcc.subst(iop) + + def smeZeroInst(name, Name, opClass, types): + global header_output, decoder_output, exec_output + code = smEnCheckCodeNoSM + smeZaWrite + ''' + // When zeroing tiles, we use 64-bit elements. This means + // that we have up to eight subtiles to clear in the ZA tile. + + ZA = ZA; + + for (int i = 0; i < 8; ++i) { + if (((imm >> i) & 0x1) == 0x1) { + getTile(ZA, i).zero(); + } + }''' + + iop = InstObjParams(name, "Sme" + Name, "SmeZeroOp", + {'code': code, 'op_class': opClass}, + ['IsNonSpeculative']) + header_output += SmeZeroDeclare.subst(iop) + exec_output += SmeTemplatedExecute.subst(iop) + + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sme' + Name} + exec_output += SmeOpExecDeclare.subst(substDict) + + # ADDHA + addCode = ''' + for (int col = 0; col < eCount; ++col) { + TPElem val = AA64FpOp1_x[col]; + + for (int row = 0; row < eCount; ++row) { + if (!(GpOp1_x[row] && GpOp2_x[col])) { + continue; + } + + tile[col][row] += val; + } + } + ''' + smeAddInst('addha', "Addha", "SimdAddOp", ['int32_t', 'int64_t'], addCode) + + # ADDSPL + addSplCode = ''' + Dest64 = imm * ArmStaticInst::getCurSmeVecLen(xc->tcBase()); + // Divide down to get the predicate length in bytes + Dest64 /= 8; + Dest64 += XOp1; + ''' + smeAddVlInst('addspl', "Addspl", "SimdAddOp", addSplCode) + + # ADDSVL + addSvlCode = ''' + Dest64 = imm * ArmStaticInst::getCurSmeVecLen(xc->tcBase()); + Dest64 += XOp1; + ''' + smeAddVlInst('addsvl', "Addsvl", "SimdAddOp", addSvlCode) + + # ADDVA + addCode = ''' + for (int row = 0; row < eCount; ++row) { + TPElem val = AA64FpOp1_x[row]; + + for (int col = 0; col < eCount; ++col) { + if (!(GpOp1_x[row] && GpOp2_x[col])) { + continue; + } + + tile[col][row] += val; + } + } + ''' + smeAddInst('addva', "Addva", "SimdAddOp", ['int32_t', 'int64_t'], addCode) + + # BFMOPA + # BFMOPS + + # FMOPA (non-widening) + fmopxCode = ''' + auto tile = getTile(ZA, imm); + FPSCR fpscr = (FPSCR) Fpscr; + + for (int j = 0; j < eCount; ++j) { + if (!GpOp1_xd[j]) { + continue; + } + + TPDElem val1 = AA64FpOp1_xd[j]; + + for (int i = 0; i < eCount; ++i) { + if (!GpOp2_xd[i]) { + continue; + } + + TPDElem val2 = AA64FpOp2_xd[i]; + + #if %s + val2 = fplibNeg(val2); + #endif + + TPDElem res = fplibMul(val1, val2, fpscr); + + tile[j][i] = fplibAdd(tile[j][i], + res, fpscr); + } + } + ''' + smeFPOPInst('fmopa', 'Fmopa', 'MatrixOPOp', ['uint32_t', 'uint64_t'], + ['uint32_t', 'uint64_t'], fmopxCode % "0") + + # FMOPA (widening) + wideningFmopxCode = ''' + auto tile = getTile(ZA, imm); + FPSCR fpscr = (FPSCR) Fpscr; + + for (int j = 0; j < eCount; ++j) { + if (!GpOp1_xd[j]) { + continue; + } + for (int i = 0; i < eCount; ++i) { + if (!GpOp2_xd[i]) { + continue; + } + + for (int k = 0; k < 2; ++k) { + TPSElem temp1 = (AA64FpOp1_xd[j] >> (16 * k)) & 0xFFFF; + TPSElem temp2 = (AA64FpOp2_xd[j] >> (16 * k)) & 0xFFFF; + TPDElem val1 = fplibConvert(temp1, + FPCRRounding(fpscr), fpscr); + TPDElem val2 = fplibConvert(temp2, + FPCRRounding(fpscr), fpscr); + + #if %s + val2 = fplibNeg(val2); + #endif + + TPDElem res = fplibMul(val1, val2, fpscr); + tile[j][i] = fplibAdd(tile[j][i], res, fpscr); + } + } + } + ''' + smeFPOPInst('fmopa', 'FmopaWidening', 'MatrixOPOp', + ['uint16_t'], ['uint32_t'], wideningFmopxCode % "0") + + # FMOPS (non-widening) + smeFPOPInst('fmops', 'Fmops', 'MatrixOPOp', ['uint32_t', 'uint64_t'], + ['uint32_t', 'uint64_t'], fmopxCode % "1") + + # FMOPS (widening) + smeFPOPInst('fmops', 'FmopsWidening', 'MatrixOPOp', + ['uint16_t'], ['uint32_t'], wideningFmopxCode % "1") + + # LD1B + smeLd1xInst('ld1b', 'Ld1b', 'MemReadOp', ['uint8_t']) + + # LD1D + smeLd1xInst('ld1d', 'Ld1d', 'MemReadOp', ['uint64_t']) + + # LD1H + smeLd1xInst('ld1h', 'Ld1h', 'MemReadOp', ['uint16_t']) + + # LD1Q + smeLd1xInst('ld1q', 'Ld1q', 'MemReadOp', ['__uint128_t']) + + # LD1W + smeLd1xInst('ld1w', 'Ld1w', 'MemReadOp', ['uint32_t']) + + # LDR + smeLdrInst("ldr", "Ldr", 'MemReadOp') + + # MOV (tile to vector) - ALIAS; see MOVA + # MOV (vector to tile) - ALIAS; see MOVA + # MOVA (tile to vector) + smeMovaExtractInst("mova", "MovaExtract", 'MatrixMovOp', + ["uint8_t", "uint16_t", "uint32_t", "uint64_t", + "__uint128_t"]) + + # MOVA (vector to tile) + smeMovaInsertInst("mova", "MovaInsert", 'MatrixMovOp', + ["uint8_t", "uint16_t", "uint32_t", "uint64_t", + "__uint128_t"]) + + # RDSVL + smeRdsvlInst('rdsvl', 'Rdsvl', 'SimdAddOp') + + # SMOPA + intMopxCode = ''' + auto tile = getTile(ZA, imm); + + size_t shift = 8 * sizeof(TPS1Elem); + size_t mask = (1 << shift) - 1; + + for (int j = 0; j < eCount; ++j) { + for (int i = 0; i < eCount; ++i) { + for (int k = 0; k < 4; ++k) { + if (!GpOp1_xs1[4 * j + k]) { + continue; + } + + if (!GpOp2_xs2[4 * i + k]) { + continue; + } + + TPS1Elem temp1 = + (TPS1Elem)(AA64FpOp1_xd[j] >> (shift * k)) & mask; + TPS2Elem temp2 = + (TPS2Elem)(AA64FpOp2_xd[i] >> (shift * k)) & mask; + + tile[j][i] %s= (TPDElem)temp1 * (TPDElem)temp2; + } + } + } + ''' + smeIntOPInst('smopa', 'Smopa', 'MatrixOPOp', ['int8_t', 'int16_t'], + ['int8_t', 'int16_t'], ['int32_t', 'int64_t'], + intMopxCode % "+") + + # SMOPS + smeIntOPInst('smops', 'Smops', 'MatrixOPOp', ['int8_t', 'int16_t'], + ['int8_t', 'int16_t'], ['int32_t', 'int64_t'], + intMopxCode % "-") + + # SMSTART + smstartSmstopCode = ''' + // Bit 0 of imm determines if we are setting or clearing + // (smstart vs smstop) + // Bit 1 means that we are applying this to SM + // Bit 2 means that we are applying this to ZA + bool new_state = imm & 0x1; + bool sm_affected = imm & 0x2; + bool za_affected = imm & 0x4; + bool old_sm_state = Svcr & 0x1; + bool old_za_state = Svcr & 0x2; + + bool sm_changed = sm_affected && old_sm_state != new_state; + bool za_changed = za_affected && old_za_state != new_state; + + if (sm_changed) { + // We need to zero the SVE Z, P, FFR registers on SM change. Also, + // set FPSR to a default value. Note that we use the max SVE len + // instead of the actual vector length. + // + // For the Z, P registers we are directly setting these to zero + // without going through the ISA parser (which generates the + // dependencies) as otherwise the O3 CPU can deadlock when there + // are too few free physical registers. We therefore rely on this + // instruction being a barrier (IsSerialiseAfter). + + // Z Registers, including special and interleave registers + ArmISA::VecRegContainer zeroed_z_reg; + zeroed_z_reg.zero(); + + for (int reg_idx = 0; reg_idx < NumVecRegs; ++reg_idx) { + auto reg_id = ArmISA::vecRegClass[reg_idx]; + xc->tcBase()->setReg(reg_id, &zeroed_z_reg); + } + + // P Registers, including the FFR + ArmISA::VecPredRegContainer zeroed_p_reg; + zeroed_p_reg.reset(); + + for (int reg_idx = 0; reg_idx < NumVecPredRegs; ++reg_idx) { + auto reg_id = ArmISA::vecPredRegClass[reg_idx]; + xc->tcBase()->setReg(reg_id, &zeroed_p_reg); + } + + // FPSR + Fpsr = 0x0800009f; + } + + if (za_changed) { + // ZA write + ZA = ZA; + ZA.zero(); + } + + // Now that we've handled the zeroing of the appropriate registers, + // we update the pstate accordingly. + + if (sm_changed) { + if (new_state == 1) { + Svcr = Svcr | 0x1; // Set SM + } else { + Svcr = Svcr & ~(uint64_t)0x1; // Clear SM + } + } + + if (za_changed) { + if (new_state == 1) { + Svcr = Svcr | 0x2; // Set ZA + } else { + Svcr = Svcr & ~(uint64_t)0x2; // Clear ZA + } + } + ''' + + smeMsrInst('smstart', 'Smstart', 'IntAluOp', + smstartSmstopCode) + + # SMSTOP + smeMsrInst('smstop', 'Smstop', 'IntAluOp', + smstartSmstopCode) + + # ST1B + smeSt1xInst('st1b', 'St1b', 'MemWriteOp', ['uint8_t']) + + # ST1D + smeSt1xInst('st1d', 'St1d', 'MemWriteOp', ['uint64_t']) + + # ST1H + smeSt1xInst('st1h', 'St1h', 'MemWriteOp', ['uint16_t']) + + # ST1Q + smeSt1xInst('st1q', 'St1q', 'MemWriteOp', ['__uint128_t']) + + # ST1W + smeSt1xInst('st1w', 'St1w', 'MemWriteOp', ['uint32_t']) + + # STR + smeStrInst("str", "Str", "MemWriteOp") + + # SUMOPA + smeIntOPInst('sumopa', 'Sumopa', 'MatrixOPOp', ['int8_t', 'int16_t'], + ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'], + intMopxCode % "+") + + # SUMOPS + smeIntOPInst('sumops', 'Sumops', 'MatrixOPOp', ['int8_t', 'int16_t'], + ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'], + intMopxCode % "-") + + # UMOPA + smeIntOPInst('umopa', 'Umopa', 'MatrixOPOp', ['uint8_t', 'uint16_t'], + ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'], + intMopxCode % "+") + + # UMOPS + smeIntOPInst('umops', 'Umops', 'MatrixOPOp', ['uint8_t', 'uint16_t'], + ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'], + intMopxCode % "-") + + # USMOPA + smeIntOPInst('usmopa', 'Usmopa', 'MatrixOPOp', ['uint8_t', 'uint16_t'], + ['int8_t', 'int16_t'], ['int32_t', 'int64_t'], + intMopxCode % "+") + + # USMOPS + smeIntOPInst('usmops', 'Usmops', 'MatrixOPOp', ['uint8_t', 'uint16_t'], + ['int8_t', 'int16_t'], ['int32_t', 'int64_t'], + intMopxCode % "-") + + # ZERO + smeZeroInst("zero", "Zero", "MatrixOp", ["uint64_t"]) + +}}; diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 7cb733100f..97d4ec7e56 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -1310,6 +1310,34 @@ let {{ substDict = {'targs' : type, 'class_name' : 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict); + # Generates definition for SVE psel predicate selection instructions + def svePselInst(name, Name, opClass, types): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + uint8_t index = ((uint32_t)Op2 + imm) % eCount; + + bool copy = POp1_x[index]; + if (copy) { + for (int i = 0; i < eCount; ++i) { + PDest_x[i] = GpOp_x[i]; + } + } else { + for (int i = 0; i < eCount; ++i) { + PDest_x[i] = false; + } + } + ''' + iop = ArmInstObjParams(name, 'Sve' + Name, 'SvePselOp', + {'code': code, 'op_class': opClass}, []) + header_output += SvePselOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict); + # Generate definition for SVE compare & terminate instructions def sveCompTermInst(name, Name, opClass, types, op): global header_output, exec_output, decoders @@ -3096,6 +3124,31 @@ let {{ 'class_name' : 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) + # Generate definitions for clamp to min/max instructions + def sveClampInst(name, Name, opClass, types, + decoder = 'Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + + for (int i = 0 ; i < eCount ; ++i) { + if (AA64FpDestMerge_x[i] < AA64FpOp2_x[i]) { + AA64FpDest_x[i] = AA64FpOp2_x[i]; + } else if (AA64FpDestMerge_x[i] > AA64FpOp1_x[i]) { + AA64FpDest_x[i] = AA64FpOp1_x[i]; + } + } + ''' + iop = ArmInstObjParams(name, 'Sve' + Name, 'SveClampOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveClampOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + fpTypes = ('uint16_t', 'uint32_t', 'uint64_t') signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t') unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t') @@ -4071,6 +4124,8 @@ let {{ svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp') # PNEXT svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes) + # PSEL + svePselInst('psel', 'Psel', 'SimdPredAluOp', unsignedTypes) # PTEST svePredTestInst('ptest', 'Ptest', 'SimdPredAluOp') # PTRUE @@ -4140,6 +4195,10 @@ let {{ ['uint16_t', 'uint32_t', 'uint64_t'], revCode % {'revtype' : 'uint8_t'}, predType=PredType.MERGE, srcRegType=SrcRegType.Vector, decoder='Generic') + # REVD + sveUnaryInst('revd', 'Revd', 'SimdAluOp', ['__uint128_t'], + revCode % {'revtype' : 'uint64_t'}, predType=PredType.MERGE, + srcRegType=SrcRegType.Vector, decoder='Generic') # REVH sveUnaryInst('revh', 'Revh', 'SimdAluOp', ['uint32_t', 'uint64_t'], revCode % {'revtype' : 'uint16_t'}, predType=PredType.MERGE, @@ -4160,6 +4219,8 @@ let {{ sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp', ['int8_t, int64_t', 'int16_t, int64_t', 'int32_t, int64_t'], addvCode, '0') + # SCLAMP + sveClampInst('sclamp', 'Sclamp', 'SimdAluOp', signedTypes) # SCVTF scvtfCode = fpOp % ('fplibFixedToFP(' 'sext(srcElem1), 0,' @@ -4545,6 +4606,8 @@ let {{ ['uint8_t, uint64_t', 'uint16_t, uint64_t', 'uint32_t, uint64_t', 'uint64_t, uint64_t'], addvCode, '0') + # UCLAMP + sveClampInst('uclamp', 'Uclamp', 'SimdAluOp', unsignedTypes) # UCVTF ucvtfCode = fpOp % ('fplibFixedToFP(srcElem1, 0, true,' ' FPCRRounding(fpscr), fpscr)') diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 5919ae974e..24a0af9155 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -57,6 +57,8 @@ def operand_types {{ # For operations that are implemented as a template 'x' : 'TPElem', 'xs' : 'TPSElem', + 'xs1' : 'TPS1Elem', + 'xs2' : 'TPS2Elem', 'xd' : 'TPDElem', 'pc' : 'ArmISA::VecPredRegContainer', 'pb' : 'uint8_t' @@ -451,6 +453,8 @@ def operands {{ # Predicate register operands 'GpOp': VecPredReg('gp'), + 'GpOp1': VecPredReg('gp1'), + 'GpOp2': VecPredReg('gp2'), 'POp1': VecPredReg('op1'), 'POp2': VecPredReg('op2'), 'PDest': VecPredReg('dest'), @@ -496,6 +500,7 @@ def operands {{ 'LLSCLock': CntrlRegNC('MISCREG_LOCKFLAG'), 'Dczid' : CntrlRegNC('MISCREG_DCZID_EL0'), 'PendingDvm': CntrlRegNC('MISCREG_TLBINEEDSYNC'), + 'Svcr' : CntrlReg('MISCREG_SVCR'), #Register fields for microops 'URa' : IntReg('ura'), diff --git a/src/arch/arm/isa/templates/sme.isa b/src/arch/arm/isa/templates/sme.isa new file mode 100644 index 0000000000..1bec2a3a71 --- /dev/null +++ b/src/arch/arm/isa/templates/sme.isa @@ -0,0 +1,773 @@ +// Copyright (c) 2022 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// @file Definition of SME instruction templates. + +let {{ + # All SME instructions should be checking if Streaming Mode is + # enabled in the PSTATE. The following call checks both the SME and + # the FP enable flags in the relevant registers depending on the + # current EL. + smEnCheckCodeNoPstate = ''' + if (FullSystem) { + fault = this->checkSmeEnabled(xc->tcBase(), Cpsr, Cpacr64); + if (fault != NoFault) { + return fault; + } + } + ''' + + smPreamble = ''' + CPSR cpsr = (CPSR) Cpsr; + ExceptionLevel target_el = (ExceptionLevel) (uint8_t) cpsr.el; + if (target_el == EL0) { + target_el = EL1; + } + ''' + + smCheckCode = ''' + // Check streaming mode first + if ((Svcr & 1) != 0b1) { + fault = smeAccessTrap(target_el, 0b10); + return fault; + } + ''' + + zaCheckCode = ''' + // Check if ZA is enabled + if ((Svcr & 2) >> 1 != 0b1) { + fault = smeAccessTrap(target_el, 0b11); + return fault; + } + ''' + + # If streaming mode is disabled or ZA is disabled we trap + smEnCheckCode = smPreamble + smCheckCode + zaCheckCode + \ + smEnCheckCodeNoPstate + + # If ZA is disabled we trap + smEnCheckCodeNoSM = smPreamble + zaCheckCode + smEnCheckCodeNoPstate + + # If streaming mode is disabled we trap + smEnCheckCodeNoZA = smPreamble + smCheckCode + smEnCheckCodeNoPstate + + smeZaWrite = ''' + // Force the ISA parser to see the access to ZA as a write, + // not a read. + ZA = ZA; + ''' +}}; + +def template SmeAddDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex gp1, + RegIndex gp2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, gp1, gp2) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeAddVlDeclare {{ + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, + RegIndex dest, RegIndex op1, + int8_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + dest, op1, imm) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeLd1xDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex mpop1, + RegIndex op2, RegIndex op3, + bool V) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, mpop1, op2, op3, V) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + }; +}}; + +def template SmeLd1xExecute {{ + template + Fault %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + // We need a buffer in which to store the data: + TPElem data[MaxSmeVecLenInBytes / sizeof(TPElem)]; + + if (fault == NoFault) { + // The size of the access is controlled by the type of data, and + // the number of elements. + fault = xc->readMem(EA, (uint8_t*)data, eCount * sizeof(TPElem), + flags, rdEn); + } + + if (fault == NoFault) { + %(za_write)s + + // Write back the changes to the actual tile + %(op_wb)s; + } + + return fault; + } +}}; + +def template SmeLd1xInitiateAcc {{ + template + Fault %(class_name)s::initiateAcc(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + fault = xc->initiateMemRead(EA, eCount * sizeof(TPElem), + flags, rdEn); + } + + return fault; + } +}}; + +def template SmeLd1xCompleteAcc {{ + template + Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + // The O3 CPU will call this with a NULL-pointer if the access was + // disabled. Just return. + if (pkt == NULL) { + return fault; + } + + if (fault == NoFault) { + // We need a buffer in which to store the data: + TPElem data[MaxSmeVecLenInBytes / sizeof(TPElem)]; + + // The size for the amount of data returned here should + // have been set in initiateAcc. + memcpy((uint8_t*)data, pkt->getPtr(), pkt->getSize()); + + %(za_write)s + + // Write back the changes to the tile + %(op_wb)s; + } + return fault; + } +}}; + +def template SmeLd1xExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + ExecContext *, trace::InstRecord *) const; + template + Fault %(class_name)s<%(targs)s>::initiateAcc( + ExecContext *, trace::InstRecord *) const; + template + Fault %(class_name)s<%(targs)s>::completeAcc( + PacketPtr, ExecContext *, trace::InstRecord *) const; +}}; + +def template SmeLdrDeclare {{ + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, op2) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + }; +}}; + +def template SmeLdrExecute {{ + Fault %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + auto rdEn = std::vector(eCount, true); + + // We need a buffer in which to store the data: + uint8_t data[MaxSmeVecLenInBytes]; + + if (fault == NoFault) { + fault = xc->readMem(EA, (uint8_t*)data, eCount, flags, rdEn); + } + + if (fault == NoFault) { + auto row = getTileHSlice(ZA, 0, vec_index); + for (int i = 0; i < eCount; ++i) { + row[i] = data[i]; + } + + %(op_wb)s; + } + + return fault; + } +}}; + +def template SmeLdrInitiateAcc {{ + Fault %(class_name)s::initiateAcc(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + auto rdEn = std::vector(eCount, true); + + if (fault == NoFault) { + fault = xc->initiateMemRead(EA, eCount, flags, rdEn); + } + + return fault; + } +}}; + +def template SmeLdrCompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + // The O3 CPU will call this with a NULL-pointer if the access was + // disabled. Just return. + if (pkt == NULL) { + return fault; + } + + if (fault == NoFault) { + // Get the data out of the packet + auto row = getTileHSlice(ZA, 0, vec_index); + for (int i = 0; i < eCount; ++i) { + row[i] = pkt->getPtr()[i]; + } + + %(op_wb)s; + } + + return fault; + } +}}; + +def template SMEMgmtDeclare {{ + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, imm) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeMovaExtractDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, RegIndex op1, + uint8_t imm, RegIndex gp, + RegIndex op2, bool v) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + op1, imm, gp, op2, v) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeMovaInsertDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint8_t imm, + RegIndex op1, RegIndex gp, + RegIndex op2, bool v) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, gp, op2, v) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeFPOPDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex gp1, + RegIndex gp2, RegIndex op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, gp1, gp2, op2) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeIntOPDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex gp1, + RegIndex gp2, RegIndex op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, gp1, gp2, op2) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeRdsvlDeclare {{ + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, + RegIndex dest, int8_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + dest, imm) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeSt1xDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex mpop1, + RegIndex op2, RegIndex op3, bool V) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, mpop1, op2, op3, V) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + }; +}}; + +def template SmeSt1xExecute {{ + template + Fault %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + fault = xc->writeMem((uint8_t*)data, eCount * sizeof(TPElem), EA, + flags, NULL, wrEn); + } + + return fault; + } +}}; + +def template SmeSt1xInitiateAcc {{ + template + Fault %(class_name)s::initiateAcc(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + fault = xc->writeMem((uint8_t*)data, eCount * sizeof(TPElem), EA, + flags, NULL, wrEn); + } + + return fault; + } +}}; + +def template SmeSt1xCompleteAcc {{ + template + Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template SmeStrDeclare {{ + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint64_t imm, + RegIndex op1, RegIndex op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + imm, op1, op2) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + }; +}}; + +def template SmeStrExecute {{ + Fault %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + + if (fault == NoFault) { + auto wrEn = std::vector(eCount, true); + fault = xc->writeMem((uint8_t*)data, eCount, EA, + flags, NULL, wrEn); + } + + return fault; + } +}}; + +def template SmeStrInitiateAcc {{ + Fault %(class_name)s::initiateAcc(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + Request::Flags flags = 0; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + auto wrEn = std::vector(eCount, true); + fault = xc->writeMem((uint8_t*)data, eCount, EA, + flags, NULL, wrEn); + } + + return fault; + } +}}; + +def template SmeStrCompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const + { + // TODO-SME: Can this fail? + return NoFault; + } +}}; + +def template SmeSt1xExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + ExecContext *, trace::InstRecord *) const; + template + Fault %(class_name)s<%(targs)s>::initiateAcc( + ExecContext *, trace::InstRecord *) const; + template + Fault %(class_name)s<%(targs)s>::completeAcc( + PacketPtr, ExecContext *, trace::InstRecord *) const; +}}; + +def template SmeZeroDeclare {{ + template + class %(class_name)s : public %(base_class)s + { + private: + %(reg_idx_arr_decl)s; + + public: + /// Constructor. + %(class_name)s(ExtMachInst machInst, uint8_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, imm) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + }; +}}; + +def template SmeExecute {{ + Fault + %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SmeTemplatedExecute {{ + template + Fault + %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SmeDualTemplatedExecute {{ + template + Fault + %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SmeTripleTemplatedExecute {{ + template + Fault + %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template SmeOpExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + ExecContext *, trace::InstRecord *) const; +}}; diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index fc38a2b979..9260441c2c 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -800,6 +800,33 @@ class %(class_name)s : public %(base_class)s }; }}; +def template SvePselOpDeclare {{ +template +class %(class_name)s : public %(base_class)s +{ + private: + %(reg_idx_arr_decl)s; + + protected: + typedef _Element Element; + typedef _Element TPElem; + + public: + %(class_name)s(ExtMachInst machInst, + RegIndex dest, RegIndex op1, + RegIndex gp, RegIndex op2, + uint64_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + dest, op1, gp, op2, imm) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; +}; +}}; + def template SveCompTermOpDeclare {{ template class %(class_name)s : public %(base_class)s @@ -1170,6 +1197,32 @@ class %(class_name)s : public %(base_class)s }; }}; +def template SveClampOpDeclare {{ +template +class %(class_name)s : public %(base_class)s +{ + private: + %(reg_idx_arr_decl)s; + + protected: + typedef _Element Element; + typedef _Element TPElem; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + RegIndex dest, RegIndex op1, RegIndex op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + dest, op1, op2) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; +}; +}}; + def template SveWideningOpExecute {{ template Fault diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa index 0b4abfcce4..047cd1ef79 100644 --- a/src/arch/arm/isa/templates/templates.isa +++ b/src/arch/arm/isa/templates/templates.isa @@ -82,3 +82,6 @@ //Templates for SVE instructions ##include "sve.isa" ##include "sve_mem.isa" + +//Templates for SME instructions +##include "sme.isa" From b860e2039baa3bd14386b8d294dca3cc763e106b Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 17:21:48 +0100 Subject: [PATCH 151/492] system-arm: Enable SME in the bootloader In addition to SVE (which was already being enabled by the bootloader) we also enable SME to allow lower ELs to use it. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I7078a80e9a857c7cf91e3c1e52fe3812fa422394 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64341 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Jason Lowe-Power --- system/arm/bootloader/arm64/boot.S | 1 + 1 file changed, 1 insertion(+) diff --git a/system/arm/bootloader/arm64/boot.S b/system/arm/bootloader/arm64/boot.S index 50415615d2..4f26953318 100644 --- a/system/arm/bootloader/arm64/boot.S +++ b/system/arm/bootloader/arm64/boot.S @@ -64,6 +64,7 @@ _start: msr scr_el3, x0 mov x0, #(1 << 8) // Disable SVE trap to EL3 + orr x0, x0, #(1 << 12) // Disable SME trap to EL3 msr cptr_el3, x0 // Disable copro. traps to EL3 /* From 0bce2e56d9faa3e88554487233c60f5fef98d0c4 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 15 Dec 2022 13:11:44 -0800 Subject: [PATCH 152/492] dev: Ignore MC146818 UIP bit / Fix x86 Linux 5.11+ boot As of Linux 5.11, the MC146818 code was changed to avoid reading garbage data that may occur if the is a read while the registers are being updated: github.com/torvalds/linux/commit/05a0302c35481e9b47fb90ba40922b0a4cae40d8 Previously toggling this bit was fine as Linux would check twice. It now checks before and after reading time information, causing it to retry infinitely until eventually Linux bootup fails due to watchdog timeout. This changeset always sets update in progress to false. Since this is a simulation, the updates probably will not be occurring at the same time a read is occurring. Change-Id: If0f440de9f9a6bc5a773fc935d1d5af5b98a9a4b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66731 Reviewed-by: Matt Sinclair Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power --- src/dev/mc146818.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dev/mc146818.cc b/src/dev/mc146818.cc index 919efb06f7..2bfe877b95 100644 --- a/src/dev/mc146818.cc +++ b/src/dev/mc146818.cc @@ -233,8 +233,9 @@ MC146818::readData(uint8_t addr) else { switch (addr) { case RTC_STAT_REGA: - // toggle UIP bit for linux - stat_regA.uip = !stat_regA.uip; + // Linux after v5.10 checks this multiple times so toggling + // leads to a deadlock on bootup. + stat_regA.uip = 0; return stat_regA; break; case RTC_STAT_REGB: From 5a1414d78233357d361d47aa3a74c16eb240a28c Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 14 Jan 2023 10:16:09 -0300 Subject: [PATCH 153/492] arch: Remove a couple of deprecated namespaces These namespaces have gone through the deprecation period and can now be removed: X86Macroops, SMBios, RomLabels, DeliveryMode, ConditionTests. Change-Id: I6ff5e98319d92e27743a9fbeeab054497a2392e0 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67375 Tested-by: kokoro Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power --- src/arch/x86/bios/smbios.hh | 1 - src/arch/x86/fs_workload.hh | 1 - src/arch/x86/insts/microop.hh | 1 - src/arch/x86/intmessage.hh | 1 - src/arch/x86/isa/macroop.isa | 1 - src/arch/x86/isa/rom.isa | 4 +--- 6 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/arch/x86/bios/smbios.hh b/src/arch/x86/bios/smbios.hh index dc38676445..88d3344cbb 100644 --- a/src/arch/x86/bios/smbios.hh +++ b/src/arch/x86/bios/smbios.hh @@ -61,7 +61,6 @@ struct X86SMBiosSMBiosTableParams; namespace X86ISA { -GEM5_DEPRECATED_NAMESPACE(SMBios, smbios); namespace smbios { diff --git a/src/arch/x86/fs_workload.hh b/src/arch/x86/fs_workload.hh index 5c1187cda4..9d14f91bb5 100644 --- a/src/arch/x86/fs_workload.hh +++ b/src/arch/x86/fs_workload.hh @@ -55,7 +55,6 @@ namespace gem5 namespace X86ISA { -GEM5_DEPRECATED_NAMESPACE(SMBios, smbios); namespace smbios { diff --git a/src/arch/x86/insts/microop.hh b/src/arch/x86/insts/microop.hh index 9cbdec87d1..384e15e7e4 100644 --- a/src/arch/x86/insts/microop.hh +++ b/src/arch/x86/insts/microop.hh @@ -48,7 +48,6 @@ namespace gem5 namespace X86ISA { -GEM5_DEPRECATED_NAMESPACE(ConditionTests, condition_tests); namespace condition_tests { diff --git a/src/arch/x86/intmessage.hh b/src/arch/x86/intmessage.hh index f7692e25a3..71e4765c76 100644 --- a/src/arch/x86/intmessage.hh +++ b/src/arch/x86/intmessage.hh @@ -52,7 +52,6 @@ namespace X86ISA Bitfield<21> trigger; EndBitUnion(TriggerIntMessage) - GEM5_DEPRECATED_NAMESPACE(DeliveryMode, delivery_mode); namespace delivery_mode { enum IntDeliveryMode diff --git a/src/arch/x86/isa/macroop.isa b/src/arch/x86/isa/macroop.isa index 691e8d011c..d1b9e22032 100644 --- a/src/arch/x86/isa/macroop.isa +++ b/src/arch/x86/isa/macroop.isa @@ -76,7 +76,6 @@ output header {{ // Basic instruction class declaration template. def template MacroDeclare {{ - GEM5_DEPRECATED_NAMESPACE(X86Macroop, x86_macroop); namespace x86_macroop { /** diff --git a/src/arch/x86/isa/rom.isa b/src/arch/x86/isa/rom.isa index 9aef3ba3eb..bf2f9ff8e2 100644 --- a/src/arch/x86/isa/rom.isa +++ b/src/arch/x86/isa/rom.isa @@ -42,9 +42,7 @@ let {{ class X86MicrocodeRom(Rom): def getDeclaration(self): - declareLabels = \ - "GEM5_DEPRECATED_NAMESPACE(RomLabels, rom_labels);\n" - declareLabels += "namespace rom_labels\n{\n" + declareLabels = "namespace rom_labels\n{\n" for (label, microop) in self.labels.items(): declareLabels += "const static uint64_t label_%s = %d;\n" \ % (label, microop.micropc) From 8110a422665f8a40dc639aab8db7a0fe33fc23ca Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 18 Jan 2023 08:53:02 +0000 Subject: [PATCH 154/492] arch-arm: Replace Loader with loader namespace in SME code This is fixing our nightly tests [1]. There was a merge conflict between the removal of the Loader namespace and the SME patches which were still using the old capitalized version [1]: https://jenkins.gem5.org/job/nightly/491/ Change-Id: I9f709b2fff252ed6fcc76cc984592e713ab53766 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67333 Reviewed-by: Daniel Carvalho Maintainer: Daniel Carvalho Reviewed-by: Richard Cooper Tested-by: kokoro --- src/arch/arm/insts/sme.cc | 18 +++++++++--------- src/arch/arm/insts/sme.hh | 18 +++++++++--------- src/arch/arm/insts/sve.cc | 4 ++-- src/arch/arm/insts/sve.hh | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/arch/arm/insts/sme.cc b/src/arch/arm/insts/sme.cc index 305d332514..43f4579842 100644 --- a/src/arch/arm/insts/sme.cc +++ b/src/arch/arm/insts/sme.cc @@ -45,7 +45,7 @@ namespace ArmISA std::string SmeAddOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -61,7 +61,7 @@ SmeAddOp::generateDisassembly(Addr pc, std::string SmeAddVlOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -76,7 +76,7 @@ SmeAddVlOp::generateDisassembly(Addr pc, std::string SmeLd1xSt1xOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -94,7 +94,7 @@ SmeLd1xSt1xOp::generateDisassembly(Addr pc, std::string SmeLdrStrOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -108,7 +108,7 @@ SmeLdrStrOp::generateDisassembly(Addr pc, std::string SmeMovExtractOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -124,7 +124,7 @@ SmeMovExtractOp::generateDisassembly(Addr pc, std::string SmeMovInsertOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -140,7 +140,7 @@ SmeMovInsertOp::generateDisassembly(Addr pc, std::string SmeOPOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -158,7 +158,7 @@ SmeOPOp::generateDisassembly(Addr pc, std::string SmeRdsvlOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -171,7 +171,7 @@ SmeRdsvlOp::generateDisassembly(Addr pc, std::string SmeZeroOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; ArmStaticInst::printMnemonic(ss, "", false); diff --git a/src/arch/arm/insts/sme.hh b/src/arch/arm/insts/sme.hh index d6cbdde5a7..198ce52f77 100644 --- a/src/arch/arm/insts/sme.hh +++ b/src/arch/arm/insts/sme.hh @@ -63,7 +63,7 @@ class SmeAddOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for the SME ADDSPL/ADDSVL instructions @@ -82,7 +82,7 @@ class SmeAddVlOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for SME LD1x/ST1x instrucions @@ -105,7 +105,7 @@ class SmeLd1xSt1xOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for SME LDR/STR instructions @@ -124,7 +124,7 @@ class SmeLdrStrOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for SME MOVA (Tile to Vector) @@ -145,7 +145,7 @@ class SmeMovExtractOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for SME MOVA (Vector to Tile) @@ -166,7 +166,7 @@ class SmeMovInsertOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for SME output product instructions @@ -187,7 +187,7 @@ class SmeOPOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for the SME RDSVL instruction @@ -204,7 +204,7 @@ class SmeRdsvlOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; // Used for SME ZERO @@ -220,7 +220,7 @@ class SmeZeroOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; } // namespace ArmISA diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc index 9d9c2bcb1c..546074c8fd 100644 --- a/src/arch/arm/insts/sve.cc +++ b/src/arch/arm/insts/sve.cc @@ -163,7 +163,7 @@ SveWhileOp::generateDisassembly( std::string SvePselOp::generateDisassembly(Addr pc, - const Loader::SymbolTable *symtab) const + const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); @@ -851,7 +851,7 @@ SveComplexIdxOp::generateDisassembly( std::string SveClampOp::generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const + Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; printMnemonic(ss, "", false); diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh index 63a59d493a..66d82f0a3f 100644 --- a/src/arch/arm/insts/sve.hh +++ b/src/arch/arm/insts/sve.hh @@ -199,7 +199,7 @@ class SvePselOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; /// Compare and terminate loop SVE instruction. @@ -989,7 +989,7 @@ class SveClampOp : public ArmStaticInst {} std::string generateDisassembly( - Addr pc, const Loader::SymbolTable *symtab) const override; + Addr pc, const loader::SymbolTable *symtab) const override; }; From 8d0fde19612e5e3502947a324cdf102664685e7d Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 19 Jan 2023 11:08:51 +0000 Subject: [PATCH 155/492] python: Fix deprecated decorator The deprecation message was firing during the decoration process instead of firing upon first call to deprecated function. The message now fires only if the deprected function is called. Change-Id: I2d510eb24884fdba0123e71e8472db68ae9d2ce4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67334 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Reviewed-by: Daniel Carvalho Tested-by: kokoro Reviewed-by: Richard Cooper --- src/python/m5/util/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/python/m5/util/__init__.py b/src/python/m5/util/__init__.py index bc4ab4a0f5..5ae48754ab 100644 --- a/src/python/m5/util/__init__.py +++ b/src/python/m5/util/__init__.py @@ -108,8 +108,12 @@ def deprecated(replacement=None, logger=warn): message += f" Prefer {replacement} instead." logger(message) - notifyDeprecation() - return func + @wraps(func) + def wrapper(*args, **kwargs): + notifyDeprecation() + return func(*args, **kwargs) + + return wrapper return decorator From 534d9dea10847ed0efbc289c3591e0f671c05765 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Tue, 24 Jan 2023 08:28:48 +0000 Subject: [PATCH 156/492] scons: Raise bin size limit for sanitized builds. Sanitizers can enlarge binary size drammatically, north of 2GB. This can prevent successful linkage due to symbol relocation outside from the 2GB region allocated by the small x86_64 code model that is enabled by default (32-bit relative offset limitation). Switching to the medium model in x86_64 enables 64-bit relative offset for large objects (>64KB by default) while sticking to 32-bit relative addressing for code and smaller objects. Note this comes at a potential performance cost so it should not be enabled in all cases. This should still be a very happy medium for non-perf-critical sanitized builds. Jira issue: https://gem5.atlassian.net/browse/GEM5-1313 Change-Id: I9aceacfcda99cc29c8fb24b7c69aaab019ce97fd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67451 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- SConstruct | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/SConstruct b/SConstruct index e08c2984e5..43ca3ac118 100755 --- a/SConstruct +++ b/SConstruct @@ -290,6 +290,17 @@ main['CLANG'] = CXX_version and CXX_version.find('clang') >= 0 if main['GCC'] + main['CLANG'] > 1: error('Two compilers enabled at once?') +# Find the gem5 binary target architecture (usually host architecture). The +# "Target: " is consistent accross gcc and clang at the time of +# writting this. +bin_target_arch = readCommand([main['CXX'], '--verbose'], exception=False) +main["BIN_TARGET_ARCH"] = ( + "x86_64" + if bin_target_arch.find("Target: x86_64") != -1 + else "aarch64" + if bin_target_arch.find("Target: aarch64") != -1 + else "unknown" +) ######################################################################## # @@ -516,6 +527,35 @@ for variant_path in variant_paths: env.Append(CCFLAGS=['-fsanitize=%s' % sanitizers, '-fno-omit-frame-pointer'], LINKFLAGS='-fsanitize=%s' % sanitizers) + if main["BIN_TARGET_ARCH"] == "x86_64": + # Sanitizers can enlarge binary size drammatically, north of + # 2GB. This can prevent successful linkage due to symbol + # relocation outside from the 2GB region allocated by the small + # x86_64 code model that is enabled by default (32-bit relative + # offset limitation). Switching to the medium model in x86_64 + # enables 64-bit relative offset for large objects (>64KB by + # default) while sticking to 32-bit relative addressing for + # code and smaller objects. Note this comes at a potential + # performance cost so it should not be enabled in all cases. + # This should still be a very happy medium for + # non-perf-critical sanitized builds. + env.Append(CCFLAGS='-mcmodel=medium') + env.Append(LINKFLAGS='-mcmodel=medium') + elif main["BIN_TARGET_ARCH"] == "aarch64": + # aarch64 default code model is small but with different + # constrains than for x86_64. With aarch64, the small code + # model enables 4GB distance between symbols. This is + # sufficient for the largest ALL/gem5.debug target with all + # sanitizers enabled at the time of writting this. Note that + # the next aarch64 code model is "large" which prevents dynamic + # linkage so it should be avoided when possible. + pass + else: + warning( + "Unknown code model options for your architecture. " + "Linkage might fail for larger binaries " + "(e.g., ALL/gem5.debug with sanitizers enabled)." + ) else: warning("Don't know how to enable %s sanitizer(s) for your " "compiler." % sanitizers) From d48e53e0a2cece73856b8ae91cd537654ac6581b Mon Sep 17 00:00:00 2001 From: Johnny Date: Wed, 18 Jan 2023 13:43:49 +0800 Subject: [PATCH 157/492] scons: force libasan to static linking The asan(enable with --with-asan) sanitizer interpret calls to dlopen(). That replaces the RUNPATH of an executable with RUNPATH of libasan.so after libasan.so is loaded by loader. Then it may cause some shared libraries missing, i.e. the error messages is like "cannot open shared object file: No such file or directory" since the RUNPATH is no longer correct. Force the libasan to static linking can avoid this issue since libasan.a does not have a RUNPATH, thus the replacement will never happen. Change-Id: I8e5ff4d1fbe4644a258054be6e9f6d4db9062e56 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67298 Reviewed-by: Earl Ou Tested-by: kokoro Maintainer: Gabe Black --- SConstruct | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct index 43ca3ac118..2e525a3407 100755 --- a/SConstruct +++ b/SConstruct @@ -526,7 +526,9 @@ for variant_path in variant_paths: if env['GCC'] or env['CLANG']: env.Append(CCFLAGS=['-fsanitize=%s' % sanitizers, '-fno-omit-frame-pointer'], - LINKFLAGS='-fsanitize=%s' % sanitizers) + LINKFLAGS=['-fsanitize=%s' % sanitizers, + '-static-libasan']) + if main["BIN_TARGET_ARCH"] == "x86_64": # Sanitizers can enlarge binary size drammatically, north of # 2GB. This can prevent successful linkage due to symbol From a33b4931d7270e0497fdc882f901e0a9fda5edf1 Mon Sep 17 00:00:00 2001 From: Nathanael Premillieu Date: Fri, 27 Jan 2023 15:11:19 +0100 Subject: [PATCH 158/492] mem-cache: schedule already ready pf next cycle Fix a bug where a ready prefetch request was not sent directly because the schedMemSideSendEvent was not called with the right time. This fix mimics what is done in recvTimingResp. Change-Id: Ib11f8003ca1b006d976c8cc8ea541434b8902beb Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67473 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/mem/cache/base.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index cf6c9fe226..639d02610e 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -464,7 +464,8 @@ BaseCache::recvTimingReq(PacketPtr pkt) if (prefetcher) { // track time of availability of next prefetch, if any - Tick next_pf_time = prefetcher->nextPrefetchReadyTime(); + Tick next_pf_time = std::max( + prefetcher->nextPrefetchReadyTime(), clockEdge()); if (next_pf_time != MaxTick) { schedMemSideSendEvent(next_pf_time); } From 13dca0ebcbc7562e4b26ce40dd91f3bd65052feb Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Wed, 18 Jan 2023 15:22:11 +0000 Subject: [PATCH 159/492] scons: Link tcmalloc_minimal by default instead of tcmalloc tcmalloc triggers asan while tcmalloc_minimal does not. The feature difference is not significant for regular gem5 use. Jira issue: https://gem5.atlassian.net/browse/GEM5-1312 Change-Id: I410a26d2ecdf422c456d44276d9e7ec60582b8cc Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67431 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce --- SConstruct | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SConstruct b/SConstruct index 2e525a3407..6abbb51e00 100755 --- a/SConstruct +++ b/SConstruct @@ -609,9 +609,9 @@ for variant_path in variant_paths: if not GetOption('without_tcmalloc'): with gem5_scons.Configure(env) as conf: - if conf.CheckLib('tcmalloc'): + if conf.CheckLib('tcmalloc_minimal'): conf.env.Append(CCFLAGS=conf.env['TCMALLOC_CCFLAGS']) - elif conf.CheckLib('tcmalloc_minimal'): + elif conf.CheckLib('tcmalloc'): conf.env.Append(CCFLAGS=conf.env['TCMALLOC_CCFLAGS']) else: warning("You can get a 12% performance improvement by " From e4be93b55f2f71aeaf2cc0a4ef69ed9eed7be152 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 31 Jan 2023 09:38:02 -0800 Subject: [PATCH 160/492] sim: Add some helpers for setting up Signal*Ports in python. The only difference between these types in python are the compatibility strings which restrict what can connect to what. For ports which are generally useful like interrupts or resets, they should have port types with special names and even more restrictive compatibility. For other ports which are one off signals between components, that would be overkill, and these helpers will let you make a signal port which is only restricted to ports which carry the same type of data. The helpers are intended to look similar to their C++ counterpart templates, and are functions which take a type signature as a string as their argument, and return a class which is specialized to use that type signature. The class itself can be stored, or used immediately. foo = SignalSourcePort('bool')('A port for the foo signal') Change-Id: If6359b2c69f34ff775cd9aa01272ac487db08bf7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67511 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/sim/SConscript | 1 + src/sim/SignalPort.py | 77 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 src/sim/SignalPort.py diff --git a/src/sim/SConscript b/src/sim/SConscript index 75b595b8a1..e26676c00a 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -42,6 +42,7 @@ SimObject('SubSystem.py', sim_objects=['SubSystem']) SimObject('RedirectPath.py', sim_objects=['RedirectPath']) SimObject('PowerState.py', sim_objects=['PowerState'], enums=['PwrState']) SimObject('PowerDomain.py', sim_objects=['PowerDomain']) +SimObject('SignalPort.py', sim_objects=[]) Source('async.cc') Source('backtrace_%s.cc' % env['BACKTRACE_IMPL'], add_tags='gem5 trace') diff --git a/src/sim/SignalPort.py b/src/sim/SignalPort.py new file mode 100644 index 0000000000..fc529a8b45 --- /dev/null +++ b/src/sim/SignalPort.py @@ -0,0 +1,77 @@ +# Copyright 2023 Google, Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import Port, VectorPort + +SIGNAL_SOURCE_ROLE_TEMPLATE = "Signal source <%s>" +SIGNAL_SINK_ROLE_TEMPLATE = "Signal sink <%s>" + + +def SignalSourcePort(type_signature): + source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature + sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature + Port.compat(source_role, sink_role) + + class SignalSourcePort(Port): + def __init__(self, desc): + super().__init__(source_role, desc, is_source=True) + + return SignalSourcePort + + +def VectorSignalSourcePort(type_signature): + source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature + sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature + Port.compat(source_role, sink_role) + + class VectorSignalSourcePort(VectorPort): + def __init__(self, desc): + super().__init__(source_role, desc, is_source=True) + + return VectorSignalSourcePort + + +def SignalSinkPort(type_signature): + source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature + sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature + Port.compat(source_role, sink_role) + + class SignalSinkPort(Port): + def __init__(self, desc): + super().__init__(sink_role, desc) + + return SignalSinkPort + + +def VectorSignalSinkPort(type_signature): + source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature + sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature + Port.compat(source_role, sink_role) + + class VectorSignalSinkPort(VectorPort): + def __init__(self, desc): + super().__init__(sink_role, desc) + + return VectorSignalSinkPort From f2562152e800a8a4af3633e64ca83733cf024abb Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 15 Nov 2022 13:48:53 +0000 Subject: [PATCH 161/492] arch-riscv,sim-se: Support RV32 register ABI call 1. Add RegABI32, SyscallABI32 2. Support parse function arguments to host and save result to registers 3. Add write to ThreadPointerReg in archClone 4. Support RV32 M5Op syscall Change-Id: Ie327b517f41b5d633d2741b6abb5be955281c838 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65532 Tested-by: kokoro Maintainer: Jason Lowe-Power Reviewed-by: Yu-hsin Wang Reviewed-by: Jason Lowe-Power --- src/arch/riscv/isa/formats/m5ops.isa | 8 ++++++-- src/arch/riscv/linux/linux.hh | 4 +++- src/arch/riscv/linux/se_workload.cc | 4 ++-- src/arch/riscv/linux/se_workload.hh | 4 ++-- src/arch/riscv/reg_abi.cc | 6 ++++++ src/arch/riscv/reg_abi.hh | 29 ++++++++++++++++++++++++++++ src/arch/riscv/se_workload.hh | 22 ++++++++++++++++++--- 7 files changed, 67 insertions(+), 10 deletions(-) diff --git a/src/arch/riscv/isa/formats/m5ops.isa b/src/arch/riscv/isa/formats/m5ops.isa index edc965a7f7..034a0dd2b5 100644 --- a/src/arch/riscv/isa/formats/m5ops.isa +++ b/src/arch/riscv/isa/formats/m5ops.isa @@ -38,8 +38,12 @@ def format M5Op() {{ iop = InstObjParams(name, Name, 'PseudoOp', ''' uint64_t result; - pseudo_inst::pseudoInst(xc->tcBase(), M5FUNC, result); - a0 = result''', + if (machInst.rv_type == RV32) { + pseudo_inst::pseudoInst(xc->tcBase(), M5FUNC, result); + } else { + pseudo_inst::pseudoInst(xc->tcBase(), M5FUNC, result); + } + a0 = rvSext(result)''', ['IsNonSpeculative', 'IsSerializeAfter']) header_output = BasicDeclare.subst(iop) decoder_output = BasicConstructor.subst(iop) diff --git a/src/arch/riscv/linux/linux.hh b/src/arch/riscv/linux/linux.hh index b0721836a5..de8bccc85e 100644 --- a/src/arch/riscv/linux/linux.hh +++ b/src/arch/riscv/linux/linux.hh @@ -371,8 +371,10 @@ class RiscvLinux32 : public RiscvLinux, public OpenFlagTable uint64_t stack, uint64_t tls) { ctc->getIsaPtr()->copyRegsFrom(ptc); + if (flags & TGT_CLONE_SETTLS) + ctc->setReg(RiscvISA::ThreadPointerReg, sext<32>(tls)); if (stack) - ctc->setReg(RiscvISA::StackPointerReg, stack); + ctc->setReg(RiscvISA::StackPointerReg, sext<32>(stack)); } }; diff --git a/src/arch/riscv/linux/se_workload.cc b/src/arch/riscv/linux/se_workload.cc index dac28071f4..952fe7140f 100644 --- a/src/arch/riscv/linux/se_workload.cc +++ b/src/arch/riscv/linux/se_workload.cc @@ -123,7 +123,7 @@ unameFunc32(SyscallDesc *desc, ThreadContext *tc, VPtr name) return 0; } -SyscallDescTable EmuLinux::syscallDescs64 = { +SyscallDescTable EmuLinux::syscallDescs64 = { { 0, "io_setup" }, { 1, "io_destroy" }, { 2, "io_submit" }, @@ -462,7 +462,7 @@ SyscallDescTable EmuLinux::syscallDescs64 = { { 2011, "getmainvars" } }; -SyscallDescTable EmuLinux::syscallDescs32 = { +SyscallDescTable EmuLinux::syscallDescs32 = { { 0, "io_setup" }, { 1, "io_destroy" }, { 2, "io_submit" }, diff --git a/src/arch/riscv/linux/se_workload.hh b/src/arch/riscv/linux/se_workload.hh index 41a3d41f61..4ec818b2ab 100644 --- a/src/arch/riscv/linux/se_workload.hh +++ b/src/arch/riscv/linux/se_workload.hh @@ -47,10 +47,10 @@ class EmuLinux : public SEWorkload protected: /// 64 bit syscall descriptors, indexed by call number. - static SyscallDescTable syscallDescs64; + static SyscallDescTable syscallDescs64; /// 32 bit syscall descriptors, indexed by call number. - static SyscallDescTable syscallDescs32; + static SyscallDescTable syscallDescs32; public: using Params = RiscvEmuLinuxParams; diff --git a/src/arch/riscv/reg_abi.cc b/src/arch/riscv/reg_abi.cc index b9827f74cf..3d48056b14 100644 --- a/src/arch/riscv/reg_abi.cc +++ b/src/arch/riscv/reg_abi.cc @@ -39,5 +39,11 @@ const std::vector RegABI64::ArgumentRegs = { int_reg::A4, int_reg::A5, int_reg::A6 }; +const std::vector RegABI32::ArgumentRegs = { + int_reg::A0, int_reg::A1, int_reg::A2, int_reg::A3, + int_reg::A4, int_reg::A5, int_reg::A6 +}; + + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/reg_abi.hh b/src/arch/riscv/reg_abi.hh index 3419c31222..4c965321f7 100644 --- a/src/arch/riscv/reg_abi.hh +++ b/src/arch/riscv/reg_abi.hh @@ -44,7 +44,36 @@ struct RegABI64 : public GenericSyscallABI64 static const std::vector ArgumentRegs; }; +struct RegABI32 : public GenericSyscallABI32 +{ + static const std::vector ArgumentRegs; +}; + } // namespace RiscvISA + +namespace guest_abi +{ + +// This method will be used if the size of argument type of function is +// greater than 4 for Riscv 32. +template +struct Argument && + std::is_integral_v && + ABI::template IsWideV>> +{ + static Arg + get(ThreadContext *tc, typename ABI::State &state) + { + panic_if(state >= ABI::ArgumentRegs.size(), + "Ran out of syscall argument registers."); + return bits(tc->getReg(ABI::ArgumentRegs[state++]), 31, 0); + } +}; + +} + } // namespace gem5 #endif // __ARCH_RISCV_REG_ABI_HH__ diff --git a/src/arch/riscv/se_workload.hh b/src/arch/riscv/se_workload.hh index 9ae3be4c05..dd18a92905 100644 --- a/src/arch/riscv/se_workload.hh +++ b/src/arch/riscv/se_workload.hh @@ -60,8 +60,8 @@ class SEWorkload : public gem5::SEWorkload loader::Arch getArch() const override { return loader::Riscv64; } - //FIXME RISCV needs to handle 64 bit arguments in its 32 bit ISA. - using SyscallABI = RegABI64; + using SyscallABI64 = RegABI64; + using SyscallABI32 = RegABI32; }; } // namespace RiscvISA @@ -70,7 +70,7 @@ namespace guest_abi { template <> -struct Result +struct Result { static void store(ThreadContext *tc, const SyscallReturn &ret) @@ -85,6 +85,22 @@ struct Result } }; +template <> +struct Result +{ + static void + store(ThreadContext *tc, const SyscallReturn &ret) + { + if (ret.successful()) { + // no error + tc->setReg(RiscvISA::ReturnValueReg, sext<32>(ret.returnValue())); + } else { + // got an error, return details + tc->setReg(RiscvISA::ReturnValueReg, sext<32>(ret.encodedValue())); + } + } +}; + } // namespace guest_abi } // namespace gem5 From 1b949e975922b03e5366011a646c3a41b298dc51 Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Wed, 1 Feb 2023 21:55:05 -0800 Subject: [PATCH 162/492] dev: terminal: run pollevent in terminal eventq Change-Id: Idefda0ca1cd71d3e790d470458fa1cd370393c4a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67532 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/dev/serial/terminal.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc index 52dbb9ede6..d4108a372d 100644 --- a/src/dev/serial/terminal.cc +++ b/src/dev/serial/terminal.cc @@ -87,6 +87,10 @@ Terminal::ListenEvent::ListenEvent(Terminal *t, int fd, int e) void Terminal::ListenEvent::process(int revent) { + // As a consequence of being called from the PollQueue, we might + // have been called from a different thread. Migrate to "our" + // thread. + EventQueue::ScopedMigration migrate(term->eventQueue()); term->accept(); } From 3bdbe482c23369f2c19c4074d252858e35552341 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Tue, 24 Jan 2023 09:59:30 +0000 Subject: [PATCH 163/492] base: Strengthen safe_cast and make it work for reference types safe_cast now supports the exact same types as dynamic_cast would. In particular, it now supports l-value references and rejects r-value references. The non-debug version has also been updated to make it build only in the same cases as the debug version of safe_cast would. Change-Id: I86692561c169b1ad063000c990a52ea80c6637ca Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67453 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- src/base/cast.hh | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/base/cast.hh b/src/base/cast.hh index cdc3c624a7..01464d9b2c 100644 --- a/src/base/cast.hh +++ b/src/base/cast.hh @@ -30,6 +30,8 @@ #define __BASE_CAST_HH__ #include +#include +#include "base/logging.hh" namespace gem5 { @@ -44,10 +46,20 @@ namespace gem5 template inline T -safe_cast(U ptr) +safe_cast(U&& ref_or_ptr) { - T ret = dynamic_cast(ptr); - assert(ret); + /* + * srd::forward used in conjunction with forwarding references (template T + * + T&&) ensures that dynamic_cast will see the exact same type that was + * passed to safe_cast (a.k.a., perfect forwarding). + * + * Not using std::forward would make safe_cast compile with references to + * temporary objects and thus return a dangling reference. + */ + T ret = dynamic_cast(std::forward(ref_or_ptr)); + if constexpr (std::is_pointer_v) { + gem5_assert(ret); + } return ret; } @@ -59,9 +71,19 @@ safe_cast(U ptr) template inline T -safe_cast(U ptr) +safe_cast(U&& ref_or_ptr) { - return static_cast(ptr); + /* + * safe_cast should be reserved to polymorphic types while static_cast is + * also allowed for non-polymorphic types. It could make safe_cast able to + * compile in a non-debug build and fail in a debug build. + */ + static_assert(std::is_polymorphic_v< + std::remove_pointer_t< + std::remove_reference_t< + U>> + >); + return static_cast(std::forward(ref_or_ptr)); } #endif From c1b1a702f96adcb12eb1a58ebe6170a229033ad0 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Wed, 25 Jan 2023 07:28:39 +0000 Subject: [PATCH 164/492] tests: Make the GTestException type accessible to unit tests Change-Id: I654589a3d90377657393d98e75c0697ba0e72c76 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67455 Reviewed-by: Bobby Bruce Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/base/gtest/logging.hh | 8 ++++++++ src/base/gtest/logging_mock.cc | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/base/gtest/logging.hh b/src/base/gtest/logging.hh index 12d4e5afb4..1d5a39c6bb 100644 --- a/src/base/gtest/logging.hh +++ b/src/base/gtest/logging.hh @@ -32,6 +32,14 @@ namespace gem5 { +// This custom exception type will help prevent fatal exceptions from being +// caught by other code in gem5 and let them escape to the gtest framework. +// Unfortunately that results in a somewhat confusing message about an unknown +// exception being thrown after the panic/fatal message has been printed, but +// there will at least be some indication what went wrong. +struct GTestException +{}; + class GTestLogOutput : public std::ostringstream { private: diff --git a/src/base/gtest/logging_mock.cc b/src/base/gtest/logging_mock.cc index 101374eecc..07a20ea1ec 100644 --- a/src/base/gtest/logging_mock.cc +++ b/src/base/gtest/logging_mock.cc @@ -36,14 +36,6 @@ namespace gem5 namespace { -// This custom exception type will help prevent fatal exceptions from being -// caught by other code in gem5 and let them escape to the gtest framework. -// Unfortunately that results in a somewhat confusing message about an unknown -// exception being thrown after the panic/fatal message has been printed, but -// there will at least be some indication what went wrong. -struct GTestException -{}; - class GTestLogger : public Logger { public: From d40ed0f82614fdc5bf70b6dadeb5658e08cd6d9f Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 2 Feb 2023 08:31:07 +0000 Subject: [PATCH 165/492] mem: Deprecate RequestPort and ResponsePort owner ref member The reference can be bound to an invalid object (*nullptr) in situations where no proper owner SimObject can be provided to the port constructor. This rightfully triggers a UBSAN warning. Also, these two classes do not make use of the owner reference member themselves and expose it as a protected member reference to subclasses. This desing has several drawbacks: requires the reference to owner to travel the class hierarchy up and down, loosing its true static type in the process ; non-private member variable should not be part of the API of such fundamental classes, if only for maintainability ; a reference bound from a nullable pointer is a lying API as it hides the optional aspect of ownership. Note that the reference to invalid object can't be properly fixed until the complete removal of the owner reference. This patch lays the path toward that fix. Change-Id: I8b42bc57d7826656726f7708492c43366f20633a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67551 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- src/mem/port.cc | 47 +++++++++++++++++++++++++++++++++++++++++------ src/mem/port.hh | 18 +++++++++++++----- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/src/mem/port.cc b/src/mem/port.cc index 18793d487b..e36323fb74 100644 --- a/src/mem/port.cc +++ b/src/mem/port.cc @@ -120,9 +120,23 @@ DefaultResponsePort defaultResponsePort; /** * Request port */ -RequestPort::RequestPort(const std::string& name, SimObject* _owner, - PortID _id) : Port(name, _id), _responsePort(&defaultResponsePort), - owner(*_owner) +[[deprecated]] +RequestPort::RequestPort(const std::string& name, + SimObject* _owner, + PortID _id): + Port(name, _id), _responsePort(&defaultResponsePort), owner{*_owner} +{ +} + +/*** FIXME: + * The owner reference member is going through a deprecation path. In the + * meantime, it must be initialized but no valid reference is available here. + * Using 1 instead of nullptr prevents warning upon dereference. It should be + * OK until definitive removal of owner. + */ +RequestPort::RequestPort(const std::string& name, PortID _id) : + Port(name, _id), _responsePort(&defaultResponsePort), + owner{*reinterpret_cast(1)} { } @@ -175,9 +189,30 @@ RequestPort::printAddr(Addr a) /** * Response port */ -ResponsePort::ResponsePort(const std::string& name, SimObject* _owner, - PortID id) : Port(name, id), _requestPort(&defaultRequestPort), - defaultBackdoorWarned(false), owner(*_owner) + +[[deprecated]] +ResponsePort::ResponsePort(const std::string& name, + SimObject* _owner, + PortID _id): + Port(name, _id), + _requestPort(&defaultRequestPort), + defaultBackdoorWarned(false), + owner{*_owner} +{ +} + + +/*** FIXME: + * The owner reference member is going through a deprecation path. In the + * meantime, it must be initialized but no valid reference is available here. + * Using 1 instead of nullptr prevents warning upon dereference. It should be + * OK until definitive removal of owner. + */ +ResponsePort::ResponsePort(const std::string& name, PortID id) : + Port(name, id), + _requestPort(&defaultRequestPort), + defaultBackdoorWarned(false), + owner{*reinterpret_cast(1)} { } diff --git a/src/mem/port.hh b/src/mem/port.hh index fb0f4b8812..0d61787f62 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -86,8 +86,13 @@ class RequestPort: public Port, public AtomicRequestProtocol, SimObject &owner; public: + [[deprecated("RequestPort ownership is deprecated. " + "Owner should now be registered in derived classes.")]] RequestPort(const std::string& name, SimObject* _owner, - PortID id=InvalidPortID); + PortID id=InvalidPortID); + + RequestPort(const std::string& name, PortID id=InvalidPortID); + virtual ~RequestPort(); /** @@ -266,9 +271,7 @@ class RequestPort: public Port, public AtomicRequestProtocol, class [[deprecated]] MasterPort : public RequestPort { public: - MasterPort(const std::string& name, SimObject* _owner, - PortID id=InvalidPortID) : RequestPort(name, _owner, id) - {} + using RequestPort::RequestPort; }; /** @@ -294,8 +297,13 @@ class ResponsePort : public Port, public AtomicResponseProtocol, SimObject& owner; public: + [[deprecated("ResponsePort ownership is deprecated. " + "Owner should now be registered in derived classes.")]] ResponsePort(const std::string& name, SimObject* _owner, - PortID id=InvalidPortID); + PortID id=InvalidPortID); + + ResponsePort(const std::string& name, PortID id=InvalidPortID); + virtual ~ResponsePort(); /** From 7f4c92c91032a2e5938e0f4a650ae79c1bf45479 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Fri, 6 Jan 2023 11:22:26 +0000 Subject: [PATCH 166/492] mem,arch-arm,mem-ruby,cpu: Remove use of deprecated base port owner Change-Id: I29214278c3dd4829c89a6f7c93214b8123912e74 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67452 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Daniel Carvalho Maintainer: Bobby Bruce Reviewed-by: Daniel Carvalho --- src/arch/amdgpu/common/tlb.hh | 4 +- src/arch/amdgpu/common/tlb_coalescer.hh | 2 +- src/arch/amdgpu/vega/pagetable_walker.hh | 2 +- src/arch/amdgpu/vega/tlb.hh | 4 +- src/arch/amdgpu/vega/tlb_coalescer.hh | 4 +- src/arch/arm/table_walker.cc | 11 +- src/arch/arm/table_walker.hh | 4 +- src/arch/riscv/pagetable_walker.hh | 2 +- src/arch/x86/pagetable_walker.hh | 2 +- src/cpu/kvm/base.hh | 2 +- src/cpu/minor/cpu.hh | 2 +- src/cpu/o3/fetch.cc | 2 +- src/cpu/o3/lsq.cc | 2 +- src/cpu/simple/atomic.cc | 5 +- src/cpu/simple/atomic.hh | 6 +- src/cpu/simple/timing.hh | 2 +- .../directedtest/RubyDirectedTester.hh | 2 +- .../GarnetSyntheticTraffic.hh | 2 +- .../testers/gpu_ruby_test/protocol_tester.hh | 2 +- src/cpu/testers/memtest/memtest.hh | 2 +- src/cpu/testers/rubytest/RubyTester.hh | 2 +- src/cpu/testers/traffic_gen/base.hh | 2 +- src/cpu/testers/traffic_gen/gups_gen.hh | 2 +- src/cpu/trace/trace_cpu.hh | 4 +- src/dev/arm/gic_v3_its.hh | 2 +- src/dev/arm/smmu_v3_ports.cc | 10 +- src/dev/dma_device.cc | 2 +- src/dev/x86/intdev.hh | 2 +- src/gpu-compute/compute_unit.hh | 14 +- src/gpu-compute/lds_state.hh | 2 +- src/learning_gem5/part2/simple_cache.hh | 4 +- src/learning_gem5/part2/simple_memobj.hh | 4 +- src/mem/addr_mapper.hh | 4 +- src/mem/bridge.cc | 4 +- src/mem/cache/base.cc | 45 ++++--- src/mem/cache/base.hh | 15 +-- src/mem/cfi_mem.cc | 2 +- src/mem/coherent_xbar.hh | 6 +- src/mem/comm_monitor.hh | 4 +- src/mem/dramsim2.cc | 2 +- src/mem/dramsim3.cc | 2 +- src/mem/external_master.hh | 2 +- src/mem/external_slave.hh | 2 +- src/mem/mem_checker_monitor.hh | 4 +- src/mem/mem_ctrl.cc | 2 +- src/mem/mem_delay.cc | 5 +- src/mem/noncoherent_xbar.hh | 4 +- src/mem/port.cc | 4 +- src/mem/port.hh | 4 +- src/mem/port_terminator.cc | 4 +- src/mem/port_terminator.hh | 8 +- src/mem/port_wrapper.cc | 10 +- src/mem/port_wrapper.hh | 6 +- src/mem/qos/mem_sink.cc | 2 +- src/mem/qport.hh | 13 +- .../slicc_interface/AbstractController.cc | 2 +- src/mem/ruby/system/RubyPort.cc | 126 +++++++++--------- src/mem/ruby/system/RubyPort.hh | 17 ++- src/mem/serial_link.cc | 4 +- src/mem/simple_mem.cc | 2 +- src/mem/sys_bridge.cc | 4 +- src/mem/sys_bridge.hh | 8 +- src/mem/thread_bridge.cc | 4 +- src/mem/token_port.hh | 6 +- src/mem/tport.cc | 2 +- src/sim/system.cc | 2 +- src/sim/system.hh | 4 +- src/sst/outgoing_request_bridge.cc | 2 +- src/systemc/tlm_bridge/gem5_to_tlm.hh | 2 +- src/systemc/tlm_bridge/tlm_to_gem5.hh | 2 +- 70 files changed, 223 insertions(+), 232 deletions(-) diff --git a/src/arch/amdgpu/common/tlb.hh b/src/arch/amdgpu/common/tlb.hh index 6e9014e8aa..9bd0441340 100644 --- a/src/arch/amdgpu/common/tlb.hh +++ b/src/arch/amdgpu/common/tlb.hh @@ -214,7 +214,7 @@ namespace X86ISA public: CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB, PortID _index) - : ResponsePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { } + : ResponsePort(_name), tlb(gpu_TLB), index(_index) { } protected: GpuTLB *tlb; @@ -241,7 +241,7 @@ namespace X86ISA public: MemSidePort(const std::string &_name, GpuTLB * gpu_TLB, PortID _index) - : RequestPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { } + : RequestPort(_name), tlb(gpu_TLB), index(_index) { } std::deque retries; diff --git a/src/arch/amdgpu/common/tlb_coalescer.hh b/src/arch/amdgpu/common/tlb_coalescer.hh index 6c940b1dd1..59d8ebe888 100644 --- a/src/arch/amdgpu/common/tlb_coalescer.hh +++ b/src/arch/amdgpu/common/tlb_coalescer.hh @@ -124,7 +124,7 @@ class TLBCoalescer : public ClockedObject public: CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, PortID _index) - : ResponsePort(_name, tlb_coalescer), coalescer(tlb_coalescer), + : ResponsePort(_name), coalescer(tlb_coalescer), index(_index) { } protected: diff --git a/src/arch/amdgpu/vega/pagetable_walker.hh b/src/arch/amdgpu/vega/pagetable_walker.hh index b00c0a0003..2ad0748c14 100644 --- a/src/arch/amdgpu/vega/pagetable_walker.hh +++ b/src/arch/amdgpu/vega/pagetable_walker.hh @@ -59,7 +59,7 @@ class Walker : public ClockedObject { public: WalkerPort(const std::string &_name, Walker * _walker) : - RequestPort(_name, _walker), walker(_walker) + RequestPort(_name), walker(_walker) {} protected: diff --git a/src/arch/amdgpu/vega/tlb.hh b/src/arch/amdgpu/vega/tlb.hh index c38f5914e0..e48962108c 100644 --- a/src/arch/amdgpu/vega/tlb.hh +++ b/src/arch/amdgpu/vega/tlb.hh @@ -215,7 +215,7 @@ class GpuTLB : public ClockedObject public: CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB, PortID _index) - : ResponsePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { } + : ResponsePort(_name), tlb(gpu_TLB), index(_index) { } protected: GpuTLB *tlb; @@ -242,7 +242,7 @@ class GpuTLB : public ClockedObject public: MemSidePort(const std::string &_name, GpuTLB * gpu_TLB, PortID _index) - : RequestPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { } + : RequestPort(_name), tlb(gpu_TLB), index(_index) { } std::deque retries; diff --git a/src/arch/amdgpu/vega/tlb_coalescer.hh b/src/arch/amdgpu/vega/tlb_coalescer.hh index 4ff9324715..6b7af60381 100644 --- a/src/arch/amdgpu/vega/tlb_coalescer.hh +++ b/src/arch/amdgpu/vega/tlb_coalescer.hh @@ -137,7 +137,7 @@ class VegaTLBCoalescer : public ClockedObject public: CpuSidePort(const std::string &_name, VegaTLBCoalescer *tlb_coalescer, PortID _index) - : ResponsePort(_name, tlb_coalescer), coalescer(tlb_coalescer), + : ResponsePort(_name), coalescer(tlb_coalescer), index(_index) { } protected: @@ -165,7 +165,7 @@ class VegaTLBCoalescer : public ClockedObject public: MemSidePort(const std::string &_name, VegaTLBCoalescer *tlb_coalescer, PortID _index) - : RequestPort(_name, tlb_coalescer), coalescer(tlb_coalescer), + : RequestPort(_name), coalescer(tlb_coalescer), index(_index) { } std::deque retries; diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc index bbf102dad7..60f9e3f76e 100644 --- a/src/arch/arm/table_walker.cc +++ b/src/arch/arm/table_walker.cc @@ -62,7 +62,7 @@ using namespace ArmISA; TableWalker::TableWalker(const Params &p) : ClockedObject(p), requestorId(p.sys->getRequestorId(this)), - port(new Port(this, requestorId)), + port(new Port(*this, requestorId)), isStage2(p.is_stage2), tlb(NULL), currState(NULL), pending(false), numSquashable(p.num_squash_per_cycle), @@ -138,10 +138,11 @@ TableWalker::WalkerState::WalkerState() : { } -TableWalker::Port::Port(TableWalker *_walker, RequestorID id) - : QueuedRequestPort(_walker->name() + ".port", _walker, - reqQueue, snoopRespQueue), - reqQueue(*_walker, *this), snoopRespQueue(*_walker, *this), +TableWalker::Port::Port(TableWalker& _walker, RequestorID id) + : QueuedRequestPort(_walker.name() + ".port", reqQueue, snoopRespQueue), + owner{_walker}, + reqQueue(_walker, *this), + snoopRespQueue(_walker, *this), requestorId(id) { } diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh index 6ba7ffcd73..b511fd44d0 100644 --- a/src/arch/arm/table_walker.hh +++ b/src/arch/arm/table_walker.hh @@ -941,7 +941,7 @@ class TableWalker : public ClockedObject class Port : public QueuedRequestPort { public: - Port(TableWalker* _walker, RequestorID id); + Port(TableWalker& _walker, RequestorID id); void sendFunctionalReq(Addr desc_addr, int size, uint8_t *data, Request::Flags flag); @@ -963,6 +963,8 @@ class TableWalker : public ClockedObject Tick delay, Event *event); private: + TableWalker& owner; + /** Packet queue used to store outgoing requests. */ ReqPacketQueue reqQueue; diff --git a/src/arch/riscv/pagetable_walker.hh b/src/arch/riscv/pagetable_walker.hh index 55db814471..b12b263403 100644 --- a/src/arch/riscv/pagetable_walker.hh +++ b/src/arch/riscv/pagetable_walker.hh @@ -68,7 +68,7 @@ namespace RiscvISA { public: WalkerPort(const std::string &_name, Walker * _walker) : - RequestPort(_name, _walker), walker(_walker) + RequestPort(_name), walker(_walker) {} protected: diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh index 469be6641c..14e7c9976f 100644 --- a/src/arch/x86/pagetable_walker.hh +++ b/src/arch/x86/pagetable_walker.hh @@ -65,7 +65,7 @@ namespace X86ISA { public: WalkerPort(const std::string &_name, Walker * _walker) : - RequestPort(_name, _walker), walker(_walker) + RequestPort(_name), walker(_walker) {} protected: diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh index 6b4b88af49..2d81c7c7eb 100644 --- a/src/cpu/kvm/base.hh +++ b/src/cpu/kvm/base.hh @@ -601,7 +601,7 @@ class BaseKvmCPU : public BaseCPU public: KVMCpuPort(const std::string &_name, BaseKvmCPU *_cpu) - : RequestPort(_name, _cpu), cpu(_cpu), activeMMIOReqs(0) + : RequestPort(_name), cpu(_cpu), activeMMIOReqs(0) { } /** * Interface to send Atomic or Timing IO request. Assumes that the pkt diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh index acf4295ac9..a966519c56 100644 --- a/src/cpu/minor/cpu.hh +++ b/src/cpu/minor/cpu.hh @@ -110,7 +110,7 @@ class MinorCPU : public BaseCPU public: MinorCPUPort(const std::string& name_, MinorCPU &cpu_) - : RequestPort(name_, &cpu_), cpu(cpu_) + : RequestPort(name_), cpu(cpu_) { } }; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 49416bf754..d3cdd2c761 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -76,7 +76,7 @@ namespace o3 { Fetch::IcachePort::IcachePort(Fetch *_fetch, CPU *_cpu) : - RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch) + RequestPort(_cpu->name() + ".icache_port"), fetch(_fetch) {} diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index 654fd67d41..d30a399f92 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -65,7 +65,7 @@ namespace o3 { LSQ::DcachePort::DcachePort(LSQ *_lsq, CPU *_cpu) : - RequestPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), cpu(_cpu) + RequestPort(_cpu->name() + ".dcache_port"), lsq(_lsq), cpu(_cpu) {} LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams ¶ms) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index d6638b3654..5c9fc29b64 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -78,7 +78,7 @@ AtomicSimpleCPU::AtomicSimpleCPU(const BaseAtomicSimpleCPUParams &p) width(p.width), locked(false), simulate_data_stalls(p.simulate_data_stalls), simulate_inst_stalls(p.simulate_inst_stalls), - icachePort(name() + ".icache_port", this), + icachePort(name() + ".icache_port"), dcachePort(name() + ".dcache_port", this), dcache_access(false), dcache_latency(0), ppCommit(nullptr) @@ -281,8 +281,6 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt) __func__, pkt->getAddr(), pkt->cmdString()); // X86 ISA: Snooping an invalidation for monitor/mwait - AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); - for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { cpu->wakeup(tid); @@ -312,7 +310,6 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt) __func__, pkt->getAddr(), pkt->cmdString()); // X86 ISA: Snooping an invalidation for monitor/mwait - AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { cpu->wakeup(tid); diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh index 6fd790ee2f..a194f107d3 100644 --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -117,8 +117,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU public: - AtomicCPUPort(const std::string &_name, BaseSimpleCPU* _cpu) - : RequestPort(_name, _cpu) + AtomicCPUPort(const std::string &_name) + : RequestPort(_name) { } protected: @@ -142,7 +142,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU public: AtomicCPUDPort(const std::string &_name, BaseSimpleCPU *_cpu) - : AtomicCPUPort(_name, _cpu), cpu(_cpu) + : AtomicCPUPort(_name), cpu(_cpu) { cacheBlockMask = ~(cpu->cacheLineSize() - 1); } diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh index ca6c0e26a3..86ac7b9358 100644 --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -164,7 +164,7 @@ class TimingSimpleCPU : public BaseSimpleCPU public: TimingCPUPort(const std::string& _name, TimingSimpleCPU* _cpu) - : RequestPort(_name, _cpu), cpu(_cpu), + : RequestPort(_name), cpu(_cpu), retryRespEvent([this]{ sendRetryResp(); }, name()) { } diff --git a/src/cpu/testers/directedtest/RubyDirectedTester.hh b/src/cpu/testers/directedtest/RubyDirectedTester.hh index 83f8863573..16d21b8cc0 100644 --- a/src/cpu/testers/directedtest/RubyDirectedTester.hh +++ b/src/cpu/testers/directedtest/RubyDirectedTester.hh @@ -58,7 +58,7 @@ class RubyDirectedTester : public ClockedObject public: CpuPort(const std::string &_name, RubyDirectedTester *_tester, PortID _id) - : RequestPort(_name, _tester, _id), tester(_tester) + : RequestPort(_name, _id), tester(_tester) {} protected: diff --git a/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh b/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh index 1667da1afe..def3ed29a2 100644 --- a/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh +++ b/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh @@ -84,7 +84,7 @@ class GarnetSyntheticTraffic : public ClockedObject public: CpuPort(const std::string &_name, GarnetSyntheticTraffic *_tester) - : RequestPort(_name, _tester), tester(_tester) + : RequestPort(_name), tester(_tester) { } protected: diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh index 758ece50f1..dcd5b35018 100644 --- a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh +++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh @@ -74,7 +74,7 @@ class ProtocolTester : public ClockedObject public: SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id, PortID _index) - : RequestPort(_name, _tester, _id) + : RequestPort(_name, _id) {} protected: diff --git a/src/cpu/testers/memtest/memtest.hh b/src/cpu/testers/memtest/memtest.hh index 2dc1f13dd1..3fd1674191 100644 --- a/src/cpu/testers/memtest/memtest.hh +++ b/src/cpu/testers/memtest/memtest.hh @@ -100,7 +100,7 @@ class MemTest : public ClockedObject public: CpuPort(const std::string &_name, MemTest &_memtest) - : RequestPort(_name, &_memtest), memtest(_memtest) + : RequestPort(_name), memtest(_memtest) { } protected: diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh index 1a8b993e0a..9397126180 100644 --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -76,7 +76,7 @@ class RubyTester : public ClockedObject CpuPort(const std::string &_name, RubyTester *_tester, PortID _id, PortID _index) - : RequestPort(_name, _tester, _id), tester(_tester), + : RequestPort(_name, _id), tester(_tester), globalIdx(_index) {} diff --git a/src/cpu/testers/traffic_gen/base.hh b/src/cpu/testers/traffic_gen/base.hh index 5a9af61009..530da6d718 100644 --- a/src/cpu/testers/traffic_gen/base.hh +++ b/src/cpu/testers/traffic_gen/base.hh @@ -132,7 +132,7 @@ class BaseTrafficGen : public ClockedObject public: TrafficGenPort(const std::string& name, BaseTrafficGen& traffic_gen) - : RequestPort(name, &traffic_gen), trafficGen(traffic_gen) + : RequestPort(name), trafficGen(traffic_gen) { } protected: diff --git a/src/cpu/testers/traffic_gen/gups_gen.hh b/src/cpu/testers/traffic_gen/gups_gen.hh index f33f7dae1e..38865b5480 100644 --- a/src/cpu/testers/traffic_gen/gups_gen.hh +++ b/src/cpu/testers/traffic_gen/gups_gen.hh @@ -87,7 +87,7 @@ class GUPSGen : public ClockedObject public: GenPort(const std::string& name, GUPSGen *owner) : - RequestPort(name, owner), owner(owner), _blocked(false), + RequestPort(name), owner(owner), _blocked(false), blockedPacket(nullptr) {} diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh index 9d3ae527d7..87f820fe6d 100644 --- a/src/cpu/trace/trace_cpu.hh +++ b/src/cpu/trace/trace_cpu.hh @@ -218,7 +218,7 @@ class TraceCPU : public BaseCPU public: /** Default constructor. */ IcachePort(TraceCPU* _cpu) : - RequestPort(_cpu->name() + ".icache_port", _cpu), owner(_cpu) + RequestPort(_cpu->name() + ".icache_port"), owner(_cpu) {} public: @@ -258,7 +258,7 @@ class TraceCPU : public BaseCPU public: /** Default constructor. */ DcachePort(TraceCPU* _cpu) : - RequestPort(_cpu->name() + ".dcache_port", _cpu), owner(_cpu) + RequestPort(_cpu->name() + ".dcache_port"), owner(_cpu) {} public: diff --git a/src/dev/arm/gic_v3_its.hh b/src/dev/arm/gic_v3_its.hh index 27293941c9..2e2fc29609 100644 --- a/src/dev/arm/gic_v3_its.hh +++ b/src/dev/arm/gic_v3_its.hh @@ -94,7 +94,7 @@ class Gicv3Its : public BasicPioDevice public: DataPort(const std::string &_name, Gicv3Its &_its) : - RequestPort(_name, &_its), + RequestPort(_name), its(_its) {} diff --git a/src/dev/arm/smmu_v3_ports.cc b/src/dev/arm/smmu_v3_ports.cc index 95915b2bcf..4059be7df7 100644 --- a/src/dev/arm/smmu_v3_ports.cc +++ b/src/dev/arm/smmu_v3_ports.cc @@ -45,7 +45,7 @@ namespace gem5 { SMMURequestPort::SMMURequestPort(const std::string &_name, SMMUv3 &_smmu) : - RequestPort(_name, &_smmu), + RequestPort(_name), smmu(_smmu) {} @@ -63,7 +63,7 @@ SMMURequestPort::recvReqRetry() SMMUTableWalkPort::SMMUTableWalkPort(const std::string &_name, SMMUv3 &_smmu) : - RequestPort(_name, &_smmu), + RequestPort(_name), smmu(_smmu) {} @@ -83,7 +83,7 @@ SMMUDevicePort::SMMUDevicePort(const std::string &_name, SMMUv3DeviceInterface &_ifc, PortID _id) : - QueuedResponsePort(_name, &_ifc, respQueue, _id), + QueuedResponsePort(_name, respQueue, _id), ifc(_ifc), respQueue(_ifc, *this) {} @@ -141,7 +141,7 @@ SMMUControlPort::getAddrRanges() const SMMUATSMemoryPort::SMMUATSMemoryPort(const std::string &_name, SMMUv3DeviceInterface &_ifc) : - QueuedRequestPort(_name, &_ifc, reqQueue, snoopRespQueue), + QueuedRequestPort(_name, reqQueue, snoopRespQueue), ifc(_ifc), reqQueue(_ifc, *this), snoopRespQueue(_ifc, *this) @@ -155,7 +155,7 @@ SMMUATSMemoryPort::recvTimingResp(PacketPtr pkt) SMMUATSDevicePort::SMMUATSDevicePort(const std::string &_name, SMMUv3DeviceInterface &_ifc) : - QueuedResponsePort(_name, &_ifc, respQueue), + QueuedResponsePort(_name, respQueue), ifc(_ifc), respQueue(_ifc, *this) {} diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc index ee871aa8c3..ebda635442 100644 --- a/src/dev/dma_device.cc +++ b/src/dev/dma_device.cc @@ -57,7 +57,7 @@ namespace gem5 DmaPort::DmaPort(ClockedObject *dev, System *s, uint32_t sid, uint32_t ssid) - : RequestPort(dev->name() + ".dma", dev), + : RequestPort(dev->name() + ".dma"), device(dev), sys(s), requestorId(s->getRequestorId(dev)), sendEvent([this]{ sendDma(); }, dev->name()), defaultSid(sid), defaultSSid(ssid), cacheLineSize(s->cacheLineSize()) diff --git a/src/dev/x86/intdev.hh b/src/dev/x86/intdev.hh index 0c30ef5b57..f410ae4c2e 100644 --- a/src/dev/x86/intdev.hh +++ b/src/dev/x86/intdev.hh @@ -118,7 +118,7 @@ class IntRequestPort : public QueuedRequestPort public: IntRequestPort(const std::string& _name, SimObject* _parent, Device* dev, Tick _latency) : - QueuedRequestPort(_name, _parent, reqQueue, snoopRespQueue), + QueuedRequestPort(_name, reqQueue, snoopRespQueue), reqQueue(*_parent, *this), snoopRespQueue(*_parent, *this), device(dev), latency(_latency) { diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index fcc4468ec1..cf73aa2723 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -514,7 +514,7 @@ class ComputeUnit : public ClockedObject { public: DataPort(const std::string &_name, ComputeUnit *_cu, PortID id) - : RequestPort(_name, _cu, id), computeUnit(_cu) { } + : RequestPort(_name, id), computeUnit(_cu) { } bool snoopRangeSent; @@ -586,7 +586,7 @@ class ComputeUnit : public ClockedObject { public: ScalarDataPort(const std::string &_name, ComputeUnit *_cu) - : RequestPort(_name, _cu), computeUnit(_cu) + : RequestPort(_name), computeUnit(_cu) { } @@ -657,7 +657,7 @@ class ComputeUnit : public ClockedObject { public: SQCPort(const std::string &_name, ComputeUnit *_cu) - : RequestPort(_name, _cu), computeUnit(_cu) { } + : RequestPort(_name), computeUnit(_cu) { } bool snoopRangeSent; @@ -698,7 +698,7 @@ class ComputeUnit : public ClockedObject { public: DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id) - : RequestPort(_name, _cu, id), computeUnit(_cu), + : RequestPort(_name, id), computeUnit(_cu), stalled(false) { } @@ -745,7 +745,7 @@ class ComputeUnit : public ClockedObject { public: ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu) - : RequestPort(_name, _cu), computeUnit(_cu), stalled(false) + : RequestPort(_name), computeUnit(_cu), stalled(false) { } @@ -773,7 +773,7 @@ class ComputeUnit : public ClockedObject { public: ITLBPort(const std::string &_name, ComputeUnit *_cu) - : RequestPort(_name, _cu), computeUnit(_cu), stalled(false) { } + : RequestPort(_name), computeUnit(_cu), stalled(false) { } bool isStalled() { return stalled; } @@ -815,7 +815,7 @@ class ComputeUnit : public ClockedObject { public: LDSPort(const std::string &_name, ComputeUnit *_cu) - : RequestPort(_name, _cu), computeUnit(_cu) + : RequestPort(_name), computeUnit(_cu) { } diff --git a/src/gpu-compute/lds_state.hh b/src/gpu-compute/lds_state.hh index 5fe259506b..3228b7822c 100644 --- a/src/gpu-compute/lds_state.hh +++ b/src/gpu-compute/lds_state.hh @@ -183,7 +183,7 @@ class LdsState: public ClockedObject { public: CuSidePort(const std::string &_name, LdsState *_ownerLds) : - ResponsePort(_name, _ownerLds), ownerLds(_ownerLds) + ResponsePort(_name), ownerLds(_ownerLds) { } diff --git a/src/learning_gem5/part2/simple_cache.hh b/src/learning_gem5/part2/simple_cache.hh index 8869985ffd..25d195d4f1 100644 --- a/src/learning_gem5/part2/simple_cache.hh +++ b/src/learning_gem5/part2/simple_cache.hh @@ -74,7 +74,7 @@ class SimpleCache : public ClockedObject * Constructor. Just calls the superclass constructor. */ CPUSidePort(const std::string& name, int id, SimpleCache *owner) : - ResponsePort(name, owner), id(id), owner(owner), needRetry(false), + ResponsePort(name), id(id), owner(owner), needRetry(false), blockedPacket(nullptr) { } @@ -154,7 +154,7 @@ class SimpleCache : public ClockedObject * Constructor. Just calls the superclass constructor. */ MemSidePort(const std::string& name, SimpleCache *owner) : - RequestPort(name, owner), owner(owner), blockedPacket(nullptr) + RequestPort(name), owner(owner), blockedPacket(nullptr) { } /** diff --git a/src/learning_gem5/part2/simple_memobj.hh b/src/learning_gem5/part2/simple_memobj.hh index 37afeb161f..9f09d96cd3 100644 --- a/src/learning_gem5/part2/simple_memobj.hh +++ b/src/learning_gem5/part2/simple_memobj.hh @@ -68,7 +68,7 @@ class SimpleMemobj : public SimObject * Constructor. Just calls the superclass constructor. */ CPUSidePort(const std::string& name, SimpleMemobj *owner) : - ResponsePort(name, owner), owner(owner), needRetry(false), + ResponsePort(name), owner(owner), needRetry(false), blockedPacket(nullptr) { } @@ -147,7 +147,7 @@ class SimpleMemobj : public SimObject * Constructor. Just calls the superclass constructor. */ MemSidePort(const std::string& name, SimpleMemobj *owner) : - RequestPort(name, owner), owner(owner), blockedPacket(nullptr) + RequestPort(name), owner(owner), blockedPacket(nullptr) { } /** diff --git a/src/mem/addr_mapper.hh b/src/mem/addr_mapper.hh index 2f37bbaf2e..40a0bb033b 100644 --- a/src/mem/addr_mapper.hh +++ b/src/mem/addr_mapper.hh @@ -101,7 +101,7 @@ class AddrMapper : public SimObject { public: MapperRequestPort(const std::string& _name, AddrMapper& _mapper) - : RequestPort(_name, &_mapper), mapper(_mapper) + : RequestPort(_name), mapper(_mapper) { } protected: @@ -158,7 +158,7 @@ class AddrMapper : public SimObject { public: MapperResponsePort(const std::string& _name, AddrMapper& _mapper) - : ResponsePort(_name, &_mapper), mapper(_mapper) + : ResponsePort(_name), mapper(_mapper) {} protected: diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index 36832ebfc4..fe15de7036 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -58,7 +58,7 @@ Bridge::BridgeResponsePort::BridgeResponsePort(const std::string& _name, BridgeRequestPort& _memSidePort, Cycles _delay, int _resp_limit, std::vector _ranges) - : ResponsePort(_name, &_bridge), bridge(_bridge), + : ResponsePort(_name), bridge(_bridge), memSidePort(_memSidePort), delay(_delay), ranges(_ranges.begin(), _ranges.end()), outstandingResponses(0), retryReq(false), respQueueLimit(_resp_limit), @@ -70,7 +70,7 @@ Bridge::BridgeRequestPort::BridgeRequestPort(const std::string& _name, Bridge& _bridge, BridgeResponsePort& _cpuSidePort, Cycles _delay, int _req_limit) - : RequestPort(_name, &_bridge), bridge(_bridge), + : RequestPort(_name), bridge(_bridge), cpuSidePort(_cpuSidePort), delay(_delay), reqQueueLimit(_req_limit), sendEvent([this]{ trySendTiming(); }, _name) diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 639d02610e..87c44cefb7 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -67,10 +67,11 @@ namespace gem5 { BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name, - BaseCache *_cache, + BaseCache& _cache, const std::string &_label) - : QueuedResponsePort(_name, _cache, queue), - queue(*_cache, *this, true, _label), + : QueuedResponsePort(_name, queue), + cache{_cache}, + queue(_cache, *this, true, _label), blocked(false), mustSendRetry(false), sendRetryEvent([this]{ processSendRetry(); }, _name) { @@ -78,7 +79,7 @@ BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name, BaseCache::BaseCache(const BaseCacheParams &p, unsigned blk_size) : ClockedObject(p), - cpuSidePort (p.name + ".cpu_side_port", this, "CpuSidePort"), + cpuSidePort (p.name + ".cpu_side_port", *this, "CpuSidePort"), memSidePort(p.name + ".mem_side_port", this, "MemSidePort"), mshrQueue("MSHRs", p.mshrs, 0, p.demand_mshr_reserve, p.name), writeBuffer("write buffer", p.write_buffers, p.mshrs, p.name), @@ -150,7 +151,7 @@ BaseCache::CacheResponsePort::setBlocked() // if we already scheduled a retry in this cycle, but it has not yet // happened, cancel it if (sendRetryEvent.scheduled()) { - owner.deschedule(sendRetryEvent); + cache.deschedule(sendRetryEvent); DPRINTF(CachePort, "Port descheduled retry\n"); mustSendRetry = true; } @@ -164,7 +165,7 @@ BaseCache::CacheResponsePort::clearBlocked() blocked = false; if (mustSendRetry) { // @TODO: need to find a better time (next cycle?) - owner.schedule(sendRetryEvent, curTick() + 1); + cache.schedule(sendRetryEvent, curTick() + 1); } } @@ -2522,12 +2523,12 @@ bool BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt) { // Snoops shouldn't happen when bypassing caches - assert(!cache->system->bypassCaches()); + assert(!cache.system->bypassCaches()); assert(pkt->isResponse()); // Express snoop responses from requestor to responder, e.g., from L1 to L2 - cache->recvTimingSnoopResp(pkt); + cache.recvTimingSnoopResp(pkt); return true; } @@ -2535,7 +2536,7 @@ BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt) bool BaseCache::CpuSidePort::tryTiming(PacketPtr pkt) { - if (cache->system->bypassCaches() || pkt->isExpressSnoop()) { + if (cache.system->bypassCaches() || pkt->isExpressSnoop()) { // always let express snoop packets through even if blocked return true; } else if (blocked || mustSendRetry) { @@ -2552,14 +2553,14 @@ BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt) { assert(pkt->isRequest()); - if (cache->system->bypassCaches()) { + if (cache.system->bypassCaches()) { // Just forward the packet if caches are disabled. // @todo This should really enqueue the packet rather - [[maybe_unused]] bool success = cache->memSidePort.sendTimingReq(pkt); + [[maybe_unused]] bool success = cache.memSidePort.sendTimingReq(pkt); assert(success); return true; } else if (tryTiming(pkt)) { - cache->recvTimingReq(pkt); + cache.recvTimingReq(pkt); return true; } return false; @@ -2568,39 +2569,39 @@ BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt) Tick BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt) { - if (cache->system->bypassCaches()) { + if (cache.system->bypassCaches()) { // Forward the request if the system is in cache bypass mode. - return cache->memSidePort.sendAtomic(pkt); + return cache.memSidePort.sendAtomic(pkt); } else { - return cache->recvAtomic(pkt); + return cache.recvAtomic(pkt); } } void BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt) { - if (cache->system->bypassCaches()) { + if (cache.system->bypassCaches()) { // The cache should be flushed if we are in cache bypass mode, // so we don't need to check if we need to update anything. - cache->memSidePort.sendFunctional(pkt); + cache.memSidePort.sendFunctional(pkt); return; } // functional request - cache->functionalAccess(pkt, true); + cache.functionalAccess(pkt, true); } AddrRangeList BaseCache::CpuSidePort::getAddrRanges() const { - return cache->getAddrRanges(); + return cache.getAddrRanges(); } BaseCache:: -CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache, +CpuSidePort::CpuSidePort(const std::string &_name, BaseCache& _cache, const std::string &_label) - : CacheResponsePort(_name, _cache, _label), cache(_cache) + : CacheResponsePort(_name, _cache, _label) { } @@ -2687,7 +2688,7 @@ BaseCache::CacheReqPacketQueue::sendDeferredPacket() BaseCache::MemSidePort::MemSidePort(const std::string &_name, BaseCache *_cache, const std::string &_label) - : CacheRequestPort(_name, _cache, _reqQueue, _snoopRespQueue), + : CacheRequestPort(_name, _reqQueue, _snoopRespQueue), _reqQueue(*_cache, *this, _snoopRespQueue, _label), _snoopRespQueue(*_cache, *this, true, _label), cache(_cache) { diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 78571ceb3c..8a06ec2c42 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -165,10 +165,10 @@ class BaseCache : public ClockedObject protected: - CacheRequestPort(const std::string &_name, BaseCache *_cache, + CacheRequestPort(const std::string &_name, ReqPacketQueue &_reqQueue, SnoopRespPacketQueue &_snoopRespQueue) : - QueuedRequestPort(_name, _cache, _reqQueue, _snoopRespQueue) + QueuedRequestPort(_name, _reqQueue, _snoopRespQueue) { } /** @@ -285,9 +285,11 @@ class BaseCache : public ClockedObject protected: - CacheResponsePort(const std::string &_name, BaseCache *_cache, + CacheResponsePort(const std::string &_name, BaseCache& _cache, const std::string &_label); + BaseCache& cache; + /** A normal packet queue used to store responses. */ RespPacketQueue queue; @@ -309,11 +311,6 @@ class BaseCache : public ClockedObject */ class CpuSidePort : public CacheResponsePort { - private: - - // a pointer to our specific cache implementation - BaseCache *cache; - protected: virtual bool recvTimingSnoopResp(PacketPtr pkt) override; @@ -329,7 +326,7 @@ class BaseCache : public ClockedObject public: - CpuSidePort(const std::string &_name, BaseCache *_cache, + CpuSidePort(const std::string &_name, BaseCache& _cache, const std::string &_label); }; diff --git a/src/mem/cfi_mem.cc b/src/mem/cfi_mem.cc index f8c1084700..b5354ffcbd 100644 --- a/src/mem/cfi_mem.cc +++ b/src/mem/cfi_mem.cc @@ -464,7 +464,7 @@ CfiMemory::unserialize(CheckpointIn &cp) CfiMemory::MemoryPort::MemoryPort(const std::string& _name, CfiMemory& _memory) - : ResponsePort(_name, &_memory), mem(_memory) + : ResponsePort(_name), mem(_memory) { } AddrRangeList diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh index 9693d9225e..454012db8f 100644 --- a/src/mem/coherent_xbar.hh +++ b/src/mem/coherent_xbar.hh @@ -100,7 +100,7 @@ class CoherentXBar : public BaseXBar CoherentXBarResponsePort(const std::string &_name, CoherentXBar &_xbar, PortID _id) - : QueuedResponsePort(_name, &_xbar, queue, _id), xbar(_xbar), + : QueuedResponsePort(_name, queue, _id), xbar(_xbar), queue(_xbar, *this) { } @@ -166,7 +166,7 @@ class CoherentXBar : public BaseXBar CoherentXBarRequestPort(const std::string &_name, CoherentXBar &_xbar, PortID _id) - : RequestPort(_name, &_xbar, _id), xbar(_xbar) + : RequestPort(_name, _id), xbar(_xbar) { } protected: @@ -228,7 +228,7 @@ class CoherentXBar : public BaseXBar */ SnoopRespPort(QueuedResponsePort& cpu_side_port, CoherentXBar& _xbar) : - RequestPort(cpu_side_port.name() + ".snoopRespPort", &_xbar), + RequestPort(cpu_side_port.name() + ".snoopRespPort"), cpuSidePort(cpu_side_port) { } /** diff --git a/src/mem/comm_monitor.hh b/src/mem/comm_monitor.hh index b9241090fb..fbaca86c57 100644 --- a/src/mem/comm_monitor.hh +++ b/src/mem/comm_monitor.hh @@ -124,7 +124,7 @@ class CommMonitor : public SimObject public: MonitorRequestPort(const std::string& _name, CommMonitor& _mon) - : RequestPort(_name, &_mon), mon(_mon) + : RequestPort(_name), mon(_mon) { } protected: @@ -190,7 +190,7 @@ class CommMonitor : public SimObject public: MonitorResponsePort(const std::string& _name, CommMonitor& _mon) - : ResponsePort(_name, &_mon), mon(_mon) + : ResponsePort(_name), mon(_mon) { } protected: diff --git a/src/mem/dramsim2.cc b/src/mem/dramsim2.cc index 028ed433fb..9753d690ce 100644 --- a/src/mem/dramsim2.cc +++ b/src/mem/dramsim2.cc @@ -359,7 +359,7 @@ DRAMSim2::drain() DRAMSim2::MemoryPort::MemoryPort(const std::string& _name, DRAMSim2& _memory) - : ResponsePort(_name, &_memory), mem(_memory) + : ResponsePort(_name), mem(_memory) { } AddrRangeList diff --git a/src/mem/dramsim3.cc b/src/mem/dramsim3.cc index fbffc7b579..c07a32a3c1 100644 --- a/src/mem/dramsim3.cc +++ b/src/mem/dramsim3.cc @@ -357,7 +357,7 @@ DRAMsim3::drain() DRAMsim3::MemoryPort::MemoryPort(const std::string& _name, DRAMsim3& _memory) - : ResponsePort(_name, &_memory), mem(_memory) + : ResponsePort(_name), mem(_memory) { } AddrRangeList diff --git a/src/mem/external_master.hh b/src/mem/external_master.hh index 61c41661b4..aad873b35a 100644 --- a/src/mem/external_master.hh +++ b/src/mem/external_master.hh @@ -75,7 +75,7 @@ class ExternalMaster : public SimObject public: ExternalPort(const std::string &name_, ExternalMaster &owner_) : - RequestPort(name_, &owner_), owner(owner_) + RequestPort(name_), owner(owner_) { } ~ExternalPort() { } diff --git a/src/mem/external_slave.hh b/src/mem/external_slave.hh index 17ab42a0b4..404319b557 100644 --- a/src/mem/external_slave.hh +++ b/src/mem/external_slave.hh @@ -77,7 +77,7 @@ class ExternalSlave : public SimObject public: ExternalPort(const std::string &name_, ExternalSlave &owner_) : - ResponsePort(name_, &owner_), owner(owner_) + ResponsePort(name_), owner(owner_) { } ~ExternalPort() { } diff --git a/src/mem/mem_checker_monitor.hh b/src/mem/mem_checker_monitor.hh index 17fd8eec35..808c3f6f45 100644 --- a/src/mem/mem_checker_monitor.hh +++ b/src/mem/mem_checker_monitor.hh @@ -95,7 +95,7 @@ class MemCheckerMonitor : public SimObject public: MonitorRequestPort(const std::string& _name, MemCheckerMonitor& _mon) - : RequestPort(_name, &_mon), mon(_mon) + : RequestPort(_name), mon(_mon) { } protected: @@ -156,7 +156,7 @@ class MemCheckerMonitor : public SimObject public: MonitorResponsePort(const std::string& _name, MemCheckerMonitor& _mon) - : ResponsePort(_name, &_mon), mon(_mon) + : ResponsePort(_name), mon(_mon) { } protected: diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc index beaace1cbf..543d6373d9 100644 --- a/src/mem/mem_ctrl.cc +++ b/src/mem/mem_ctrl.cc @@ -1460,7 +1460,7 @@ MemCtrl::getAddrRanges() MemCtrl::MemoryPort:: MemoryPort(const std::string& name, MemCtrl& _ctrl) - : QueuedResponsePort(name, &_ctrl, queue), queue(_ctrl, *this, true), + : QueuedResponsePort(name, queue), queue(_ctrl, *this, true), ctrl(_ctrl) { } diff --git a/src/mem/mem_delay.cc b/src/mem/mem_delay.cc index 81d40c7053..6aff1a40ab 100644 --- a/src/mem/mem_delay.cc +++ b/src/mem/mem_delay.cc @@ -81,8 +81,7 @@ MemDelay::trySatisfyFunctional(PacketPtr pkt) } MemDelay::RequestPort::RequestPort(const std::string &_name, MemDelay &_parent) - : QueuedRequestPort(_name, &_parent, - _parent.reqQueue, _parent.snoopRespQueue), + : QueuedRequestPort(_name, _parent.reqQueue, _parent.snoopRespQueue), parent(_parent) { } @@ -129,7 +128,7 @@ MemDelay::RequestPort::recvTimingSnoopReq(PacketPtr pkt) MemDelay::ResponsePort:: ResponsePort(const std::string &_name, MemDelay &_parent) - : QueuedResponsePort(_name, &_parent, _parent.respQueue), + : QueuedResponsePort(_name, _parent.respQueue), parent(_parent) { } diff --git a/src/mem/noncoherent_xbar.hh b/src/mem/noncoherent_xbar.hh index 03f751b77d..3a7e386fbf 100644 --- a/src/mem/noncoherent_xbar.hh +++ b/src/mem/noncoherent_xbar.hh @@ -96,7 +96,7 @@ class NoncoherentXBar : public BaseXBar NoncoherentXBarResponsePort(const std::string &_name, NoncoherentXBar &_xbar, PortID _id) - : QueuedResponsePort(_name, &_xbar, queue, _id), xbar(_xbar), + : QueuedResponsePort(_name, queue, _id), xbar(_xbar), queue(_xbar, *this) { } @@ -156,7 +156,7 @@ class NoncoherentXBar : public BaseXBar NoncoherentXBarRequestPort(const std::string &_name, NoncoherentXBar &_xbar, PortID _id) - : RequestPort(_name, &_xbar, _id), xbar(_xbar) + : RequestPort(_name, _id), xbar(_xbar) { } protected: diff --git a/src/mem/port.cc b/src/mem/port.cc index e36323fb74..2a253b91a3 100644 --- a/src/mem/port.cc +++ b/src/mem/port.cc @@ -64,7 +64,7 @@ class DefaultRequestPort : public RequestPort } public: - DefaultRequestPort() : RequestPort("default_request_port", nullptr) {} + DefaultRequestPort() : RequestPort("default_request_port") {} // Atomic protocol. Tick recvAtomicSnoop(PacketPtr) override { blowUp(); } @@ -89,7 +89,7 @@ class DefaultResponsePort : public ResponsePort } public: - DefaultResponsePort() : ResponsePort("default_response_port", nullptr) {} + DefaultResponsePort() : ResponsePort("default_response_port") {} // Atomic protocol. Tick recvAtomic(PacketPtr) override { blowUp(); } diff --git a/src/mem/port.hh b/src/mem/port.hh index 0d61787f62..a3acffc427 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -480,9 +480,7 @@ class ResponsePort : public Port, public AtomicResponseProtocol, class [[deprecated]] SlavePort : public ResponsePort { public: - SlavePort(const std::string& name, SimObject* _owner, - PortID id=InvalidPortID) : ResponsePort(name, _owner, id) - {} + using ResponsePort::ResponsePort; }; inline Tick diff --git a/src/mem/port_terminator.cc b/src/mem/port_terminator.cc index 725acdb2d8..6606a8ff8c 100644 --- a/src/mem/port_terminator.cc +++ b/src/mem/port_terminator.cc @@ -34,11 +34,11 @@ PortTerminator::PortTerminator(const PortTerminatorParams ¶ms): SimObject(params) { for (int i = 0; i < params.port_req_ports_connection_count; ++i) { - reqPorts.emplace_back(name() + ".req_ports" + std::to_string(i), this); + reqPorts.emplace_back(name() + ".req_ports" + std::to_string(i)); } for (int j = 0; j < params.port_resp_ports_connection_count; ++j) { reqPorts.emplace_back(name() + ".resp_ports" + - std::to_string(j), this); + std::to_string(j)); } } diff --git a/src/mem/port_terminator.hh b/src/mem/port_terminator.hh index 233b66d1ac..e95598a29f 100644 --- a/src/mem/port_terminator.hh +++ b/src/mem/port_terminator.hh @@ -66,8 +66,8 @@ class PortTerminator : public SimObject class ReqPort : public RequestPort { public: - ReqPort(const std::string &name, PortTerminator *owner): - RequestPort(name, owner) + ReqPort(const std::string &name): + RequestPort(name) {} protected: bool recvTimingResp(PacketPtr pkt) override @@ -97,8 +97,8 @@ class PortTerminator : public SimObject class RespPort : public ResponsePort { public: - RespPort(const std::string &name, PortTerminator *owner): - ResponsePort(name, owner) + RespPort(const std::string &name): + ResponsePort(name) {} }; diff --git a/src/mem/port_wrapper.cc b/src/mem/port_wrapper.cc index fd5ebbd614..3b61fb2e97 100644 --- a/src/mem/port_wrapper.cc +++ b/src/mem/port_wrapper.cc @@ -30,9 +30,8 @@ namespace gem5 { -RequestPortWrapper::RequestPortWrapper(const std::string& name, - SimObject* _owner, PortID id) - : RequestPort(name, _owner, id) +RequestPortWrapper::RequestPortWrapper(const std::string& name, PortID id) + : RequestPort(name, id) { } @@ -74,9 +73,8 @@ RequestPortWrapper::setTimingCallbacks(RecvTimingRespCallback resp_cb, recvReqRetryCb = std::move(retry_cb); } -ResponsePortWrapper::ResponsePortWrapper(const std::string& name, - SimObject* _owner, PortID id) - : ResponsePort(name, _owner, id) +ResponsePortWrapper::ResponsePortWrapper(const std::string& name, PortID id) + : ResponsePort(name, id) { } diff --git a/src/mem/port_wrapper.hh b/src/mem/port_wrapper.hh index 5dcdd5dc9b..9da118f25d 100644 --- a/src/mem/port_wrapper.hh +++ b/src/mem/port_wrapper.hh @@ -80,8 +80,7 @@ class RequestPortWrapper : public RequestPort using RecvTimingRespCallback = std::function; using RecvReqRetryCallback = std::function; - RequestPortWrapper(const std::string& name, SimObject* _owner, - PortID id = InvalidPortID); + RequestPortWrapper(const std::string& name, PortID id = InvalidPortID); void recvRangeChange() override; @@ -120,8 +119,7 @@ class ResponsePortWrapper : public ResponsePort using RecvRespRetryCallback = std::function; - ResponsePortWrapper(const std::string& name, SimObject* _owner, - PortID id = InvalidPortID); + ResponsePortWrapper(const std::string& name, PortID id = InvalidPortID); AddrRangeList getAddrRanges() const override; diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc index 3ffe7f4d61..b6b77ca9df 100644 --- a/src/mem/qos/mem_sink.cc +++ b/src/mem/qos/mem_sink.cc @@ -352,7 +352,7 @@ MemSinkCtrl::MemSinkCtrlStats::MemSinkCtrlStats(statistics::Group *parent) MemSinkCtrl::MemoryPort::MemoryPort(const std::string& n, MemSinkCtrl& m) - : QueuedResponsePort(n, &m, queue, true), + : QueuedResponsePort(n, queue, true), mem(m), queue(mem, *this, true) {} diff --git a/src/mem/qport.hh b/src/mem/qport.hh index 4758f6699b..02282a0172 100644 --- a/src/mem/qport.hh +++ b/src/mem/qport.hh @@ -77,9 +77,10 @@ class QueuedResponsePort : public ResponsePort * behaviuor in a subclass, and provide the latter to the * QueuePort constructor. */ - QueuedResponsePort(const std::string& name, SimObject* owner, - RespPacketQueue &resp_queue, PortID id = InvalidPortID) : - ResponsePort(name, owner, id), respQueue(resp_queue) + QueuedResponsePort(const std::string& name, + RespPacketQueue &resp_queue, + PortID id = InvalidPortID) : + ResponsePort(name, id), respQueue(resp_queue) { } virtual ~QueuedResponsePort() { } @@ -124,17 +125,17 @@ class QueuedRequestPort : public RequestPort public: /** - * Create a QueuedPort with a given name, owner, and a supplied + * Create a QueuedPort with a given name, and a supplied * implementation of two packet queues. The external definition of * the queues enables e.g. the cache to implement a specific queue * behaviuor in a subclass, and provide the latter to the * QueuePort constructor. */ - QueuedRequestPort(const std::string& name, SimObject* owner, + QueuedRequestPort(const std::string& name, ReqPacketQueue &req_queue, SnoopRespPacketQueue &snoop_resp_queue, PortID id = InvalidPortID) : - RequestPort(name, owner, id), reqQueue(req_queue), + RequestPort(name, id), reqQueue(req_queue), snoopRespQueue(snoop_resp_queue) { } diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 2d13a5a9b6..2d10422487 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -455,7 +455,7 @@ AbstractController::MemoryPort::recvReqRetry() AbstractController::MemoryPort::MemoryPort(const std::string &_name, AbstractController *_controller, PortID id) - : RequestPort(_name, _controller, id), controller(_controller) + : RequestPort(_name, id), controller(_controller) { } diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 48f655d007..ae21dc95ad 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -62,10 +62,10 @@ RubyPort::RubyPort(const Params &p) : ClockedObject(p), m_ruby_system(p.ruby_system), m_version(p.version), m_controller(NULL), m_mandatory_q_ptr(NULL), m_usingRubyTester(p.using_ruby_tester), system(p.system), - pioRequestPort(csprintf("%s.pio-request-port", name()), this), - pioResponsePort(csprintf("%s.pio-response-port", name()), this), - memRequestPort(csprintf("%s.mem-request-port", name()), this), - memResponsePort(csprintf("%s-mem-response-port", name()), this, + pioRequestPort(csprintf("%s.pio-request-port", name()), *this), + pioResponsePort(csprintf("%s.pio-response-port", name()), *this), + memRequestPort(csprintf("%s.mem-request-port", name()), *this), + memResponsePort(csprintf("%s-mem-response-port", name()), *this, p.ruby_system->getAccessBackingStore(), -1, p.no_retry_on_stall), gotAddrRanges(p.port_interrupt_out_port_connection_count), @@ -76,7 +76,7 @@ RubyPort::RubyPort(const Params &p) // create the response ports based on the number of connected ports for (size_t i = 0; i < p.port_in_ports_connection_count; ++i) { response_ports.push_back(new MemResponsePort(csprintf - ("%s.response_ports%d", name(), i), this, + ("%s.response_ports%d", name(), i), *this, p.ruby_system->getAccessBackingStore(), i, p.no_retry_on_stall)); } @@ -84,7 +84,7 @@ RubyPort::RubyPort(const Params &p) // create the request ports based on the number of connected ports for (size_t i = 0; i < p.port_interrupt_out_port_connection_count; ++i) { request_ports.push_back(new PioRequestPort(csprintf( - "%s.request_ports%d", name(), i), this)); + "%s.request_ports%d", name(), i), *this)); } } @@ -134,35 +134,41 @@ RubyPort::getPort(const std::string &if_name, PortID idx) } RubyPort::PioRequestPort::PioRequestPort(const std::string &_name, - RubyPort *_port) - : QueuedRequestPort(_name, _port, reqQueue, snoopRespQueue), - reqQueue(*_port, *this), snoopRespQueue(*_port, *this) + RubyPort& _port) : + QueuedRequestPort(_name, reqQueue, snoopRespQueue), + owner{_port}, + reqQueue(_port, *this), + snoopRespQueue(_port, *this) { DPRINTF(RubyPort, "Created request pioport on sequencer %s\n", _name); } RubyPort::PioResponsePort::PioResponsePort(const std::string &_name, - RubyPort *_port) - : QueuedResponsePort(_name, _port, queue), queue(*_port, *this) + RubyPort& _port) + : QueuedResponsePort(_name, queue), owner{_port}, queue(_port, *this) { DPRINTF(RubyPort, "Created response pioport on sequencer %s\n", _name); } RubyPort::MemRequestPort::MemRequestPort(const std::string &_name, - RubyPort *_port) - : QueuedRequestPort(_name, _port, reqQueue, snoopRespQueue), - reqQueue(*_port, *this), snoopRespQueue(*_port, *this) + RubyPort& _port): + QueuedRequestPort(_name, reqQueue, snoopRespQueue), + owner{_port}, + reqQueue(_port, *this), + snoopRespQueue(_port, *this) { DPRINTF(RubyPort, "Created request memport on ruby sequencer %s\n", _name); } RubyPort:: -MemResponsePort::MemResponsePort(const std::string &_name, RubyPort *_port, - bool _access_backing_store, PortID id, - bool _no_retry_on_stall) - : QueuedResponsePort(_name, _port, queue, id), queue(*_port, *this), - access_backing_store(_access_backing_store), - no_retry_on_stall(_no_retry_on_stall) +MemResponsePort::MemResponsePort(const std::string &_name, RubyPort& _port, + bool _access_backing_store, PortID id, + bool _no_retry_on_stall): + QueuedResponsePort(_name, queue, id), + owner{_port}, + queue(_port, *this), + access_backing_store(_access_backing_store), + no_retry_on_stall(_no_retry_on_stall) { DPRINTF(RubyPort, "Created response memport on ruby sequencer %s\n", _name); @@ -171,12 +177,11 @@ MemResponsePort::MemResponsePort(const std::string &_name, RubyPort *_port, bool RubyPort::PioRequestPort::recvTimingResp(PacketPtr pkt) { - RubyPort *rp = static_cast(&owner); DPRINTF(RubyPort, "Response for address: 0x%#x\n", pkt->getAddr()); // send next cycle - rp->pioResponsePort.schedTimingResp( - pkt, curTick() + rp->m_ruby_system->clockPeriod()); + owner.pioResponsePort.schedTimingResp( + pkt, curTick() + owner.m_ruby_system->clockPeriod()); return true; } @@ -199,8 +204,7 @@ bool RubyPort::MemRequestPort::recvTimingResp(PacketPtr pkt) pkt->getAddr(), port->name()); // attempt to send the response in the next cycle - RubyPort *rp = static_cast(&owner); - port->schedTimingResp(pkt, curTick() + rp->m_ruby_system->clockPeriod()); + port->schedTimingResp(pkt, curTick() + owner.m_ruby_system->clockPeriod()); return true; } @@ -208,16 +212,15 @@ bool RubyPort::MemRequestPort::recvTimingResp(PacketPtr pkt) bool RubyPort::PioResponsePort::recvTimingReq(PacketPtr pkt) { - RubyPort *ruby_port = static_cast(&owner); - for (size_t i = 0; i < ruby_port->request_ports.size(); ++i) { - AddrRangeList l = ruby_port->request_ports[i]->getAddrRanges(); + for (size_t i = 0; i < owner.request_ports.size(); ++i) { + AddrRangeList l = owner.request_ports[i]->getAddrRanges(); for (auto it = l.begin(); it != l.end(); ++it) { if (it->contains(pkt->getAddr())) { // generally it is not safe to assume success here as // the port could be blocked [[maybe_unused]] bool success = - ruby_port->request_ports[i]->sendTimingReq(pkt); + owner.request_ports[i]->sendTimingReq(pkt); assert(success); return true; } @@ -229,17 +232,16 @@ RubyPort::PioResponsePort::recvTimingReq(PacketPtr pkt) Tick RubyPort::PioResponsePort::recvAtomic(PacketPtr pkt) { - RubyPort *ruby_port = static_cast(&owner); // Only atomic_noncaching mode supported! - if (!ruby_port->system->bypassCaches()) { + if (!owner.system->bypassCaches()) { panic("Ruby supports atomic accesses only in noncaching mode\n"); } - for (size_t i = 0; i < ruby_port->request_ports.size(); ++i) { - AddrRangeList l = ruby_port->request_ports[i]->getAddrRanges(); + for (size_t i = 0; i < owner.request_ports.size(); ++i) { + AddrRangeList l = owner.request_ports[i]->getAddrRanges(); for (auto it = l.begin(); it != l.end(); ++it) { if (it->contains(pkt->getAddr())) { - return ruby_port->request_ports[i]->sendAtomic(pkt); + return owner.request_ports[i]->sendAtomic(pkt); } } } @@ -251,7 +253,6 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) { DPRINTF(RubyPort, "Timing request for address %#x on port %d\n", pkt->getAddr(), id); - RubyPort *ruby_port = static_cast(&owner); if (pkt->cacheResponding()) panic("RubyPort should never see request with the " @@ -269,7 +270,7 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) // pio port. if (pkt->cmd != MemCmd::MemSyncReq) { if (!pkt->req->isMemMgmt() && !isPhysMemAddress(pkt)) { - assert(ruby_port->memRequestPort.isConnected()); + assert(owner.memRequestPort.isConnected()); DPRINTF(RubyPort, "Request address %#x assumed to be a " "pio address\n", pkt->getAddr()); @@ -278,8 +279,8 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) pkt->pushSenderState(new SenderState(this)); // send next cycle - RubySystem *rs = ruby_port->m_ruby_system; - ruby_port->memRequestPort.schedTimingReq(pkt, + RubySystem *rs = owner.m_ruby_system; + owner.memRequestPort.schedTimingReq(pkt, curTick() + rs->clockPeriod()); return true; } @@ -290,7 +291,7 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) pkt->pushSenderState(new SenderState(this)); // Submit the ruby request - RequestStatus requestStatus = ruby_port->makeRequest(pkt); + RequestStatus requestStatus = owner.makeRequest(pkt); // If the request successfully issued then we should return true. // Otherwise, we need to tell the port to retry at a later point @@ -320,9 +321,8 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) Tick RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) { - RubyPort *ruby_port = static_cast(&owner); // Only atomic_noncaching mode supported! - if (!ruby_port->system->bypassCaches()) { + if (!owner.system->bypassCaches()) { panic("Ruby supports atomic accesses only in noncaching mode\n"); } @@ -330,7 +330,7 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) // pio port. if (pkt->cmd != MemCmd::MemSyncReq) { if (!isPhysMemAddress(pkt)) { - assert(ruby_port->memRequestPort.isConnected()); + assert(owner.memRequestPort.isConnected()); DPRINTF(RubyPort, "Request address %#x assumed to be a " "pio address\n", pkt->getAddr()); @@ -339,8 +339,8 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) pkt->pushSenderState(new SenderState(this)); // send next cycle - Tick req_ticks = ruby_port->memRequestPort.sendAtomic(pkt); - return ruby_port->ticksToCycles(req_ticks); + Tick req_ticks = owner.memRequestPort.sendAtomic(pkt); + return owner.ticksToCycles(req_ticks); } assert(getOffset(pkt->getAddr()) + pkt->getSize() <= @@ -348,7 +348,7 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) } // Find the machine type of memory controller interface - RubySystem *rs = ruby_port->m_ruby_system; + RubySystem *rs = owner.m_ruby_system; static int mem_interface_type = -1; if (mem_interface_type == -1) { if (rs->m_abstract_controls[MachineType_Directory].size() != 0) { @@ -363,7 +363,7 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) } // Find the controller for the target address - MachineID id = ruby_port->m_controller->mapAddressToMachine( + MachineID id = owner.m_controller->mapAddressToMachine( pkt->getAddr(), (MachineType)mem_interface_type); AbstractController *mem_interface = rs->m_abstract_controls[mem_interface_type][id.getNum()]; @@ -376,15 +376,14 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) void RubyPort::MemResponsePort::addToRetryList() { - RubyPort *ruby_port = static_cast(&owner); // // Unless the request port do not want retries (e.g., the Ruby tester), // record the stalled M5 port for later retry when the sequencer // becomes free. // - if (!no_retry_on_stall && !ruby_port->onRetryList(this)) { - ruby_port->addToRetryList(this); + if (!no_retry_on_stall && !owner.onRetryList(this)) { + owner.addToRetryList(this); } } @@ -393,15 +392,14 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt) { DPRINTF(RubyPort, "Functional access for address: %#x\n", pkt->getAddr()); - [[maybe_unused]] RubyPort *rp = static_cast(&owner); - RubySystem *rs = rp->m_ruby_system; + RubySystem *rs = owner.m_ruby_system; // Check for pio requests and directly send them to the dedicated // pio port. if (!isPhysMemAddress(pkt)) { DPRINTF(RubyPort, "Pio Request for address: 0x%#x\n", pkt->getAddr()); - assert(rp->pioRequestPort.isConnected()); - rp->pioRequestPort.sendFunctional(pkt); + assert(owner.pioRequestPort.isConnected()); + owner.pioRequestPort.sendFunctional(pkt); return; } @@ -626,15 +624,14 @@ RubyPort::MemResponsePort::hitCallback(PacketPtr pkt) DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse); - RubyPort *ruby_port = static_cast(&owner); - RubySystem *rs = ruby_port->m_ruby_system; + RubySystem *rs = owner.m_ruby_system; if (accessPhysMem) { // We must check device memory first in case it overlaps with the // system memory range. - if (ruby_port->system->isDeviceMemAddr(pkt)) { - auto dmem = ruby_port->system->getDeviceMemory(pkt); + if (owner.system->isDeviceMemAddr(pkt)) { + auto dmem = owner.system->getDeviceMemory(pkt); dmem->access(pkt); - } else if (ruby_port->system->isMemAddr(pkt->getAddr())) { + } else if (owner.system->isMemAddr(pkt->getAddr())) { rs->getPhysMem()->access(pkt); } else { panic("Packet is in neither device nor system memory!"); @@ -662,11 +659,10 @@ RubyPort::PioResponsePort::getAddrRanges() const { // at the moment the assumption is that the request port does not care AddrRangeList ranges; - RubyPort *ruby_port = static_cast(&owner); - for (size_t i = 0; i < ruby_port->request_ports.size(); ++i) { + for (size_t i = 0; i < owner.request_ports.size(); ++i) { ranges.splice(ranges.begin(), - ruby_port->request_ports[i]->getAddrRanges()); + owner.request_ports[i]->getAddrRanges()); } for ([[maybe_unused]] const auto &r : ranges) DPRINTF(RubyPort, "%s\n", r.to_string()); @@ -676,8 +672,7 @@ RubyPort::PioResponsePort::getAddrRanges() const bool RubyPort::MemResponsePort::isShadowRomAddress(Addr addr) const { - RubyPort *ruby_port = static_cast(&owner); - AddrRangeList ranges = ruby_port->system->getShadowRomRanges(); + AddrRangeList ranges = owner.system->getShadowRomRanges(); for (auto it = ranges.begin(); it != ranges.end(); ++it) { if (it->contains(addr)) { @@ -691,10 +686,9 @@ RubyPort::MemResponsePort::isShadowRomAddress(Addr addr) const bool RubyPort::MemResponsePort::isPhysMemAddress(PacketPtr pkt) const { - RubyPort *ruby_port = static_cast(&owner); Addr addr = pkt->getAddr(); - return (ruby_port->system->isMemAddr(addr) && !isShadowRomAddress(addr)) - || ruby_port->system->isDeviceMemAddr(pkt); + return (owner.system->isMemAddr(addr) && !isShadowRomAddress(addr)) + || owner.system->isDeviceMemAddr(pkt); } void diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index e9d073e998..66fe0a7686 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -67,11 +67,12 @@ class RubyPort : public ClockedObject class MemRequestPort : public QueuedRequestPort { private: + RubyPort& owner; ReqPacketQueue reqQueue; SnoopRespPacketQueue snoopRespQueue; public: - MemRequestPort(const std::string &_name, RubyPort *_port); + MemRequestPort(const std::string &_name, RubyPort& _port); protected: bool recvTimingResp(PacketPtr pkt); @@ -81,14 +82,16 @@ class RubyPort : public ClockedObject class MemResponsePort : public QueuedResponsePort { private: + RubyPort& owner; RespPacketQueue queue; bool access_backing_store; bool no_retry_on_stall; public: - MemResponsePort(const std::string &_name, RubyPort *_port, - bool _access_backing_store, - PortID id, bool _no_retry_on_stall); + MemResponsePort(const std::string &_name, + RubyPort& _port, + bool _access_backing_store, + PortID id, bool _no_retry_on_stall); void hitCallback(PacketPtr pkt); void evictionCallback(Addr address); @@ -112,11 +115,12 @@ class RubyPort : public ClockedObject class PioRequestPort : public QueuedRequestPort { private: + RubyPort& owner; ReqPacketQueue reqQueue; SnoopRespPacketQueue snoopRespQueue; public: - PioRequestPort(const std::string &_name, RubyPort *_port); + PioRequestPort(const std::string &_name, RubyPort& _port); protected: bool recvTimingResp(PacketPtr pkt); @@ -126,10 +130,11 @@ class RubyPort : public ClockedObject class PioResponsePort : public QueuedResponsePort { private: + RubyPort& owner; RespPacketQueue queue; public: - PioResponsePort(const std::string &_name, RubyPort *_port); + PioResponsePort(const std::string &_name, RubyPort& _port); protected: bool recvTimingReq(PacketPtr pkt); diff --git a/src/mem/serial_link.cc b/src/mem/serial_link.cc index 7847e4a26b..3c84a769bf 100644 --- a/src/mem/serial_link.cc +++ b/src/mem/serial_link.cc @@ -61,7 +61,7 @@ SerialLinkResponsePort(const std::string& _name, Cycles _delay, int _resp_limit, const std::vector& _ranges) - : ResponsePort(_name, &_serial_link), serial_link(_serial_link), + : ResponsePort(_name), serial_link(_serial_link), mem_side_port(_mem_side_port), delay(_delay), ranges(_ranges.begin(), _ranges.end()), outstandingResponses(0), retryReq(false), @@ -75,7 +75,7 @@ SerialLink::SerialLinkRequestPort::SerialLinkRequestPort(const std::string& SerialLinkResponsePort& _cpu_side_port, Cycles _delay, int _req_limit) - : RequestPort(_name, &_serial_link), serial_link(_serial_link), + : RequestPort(_name), serial_link(_serial_link), cpu_side_port(_cpu_side_port), delay(_delay), reqQueueLimit(_req_limit), sendEvent([this]{ trySendTiming(); }, _name) { diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc index 27fcac1183..a3809c5379 100644 --- a/src/mem/simple_mem.cc +++ b/src/mem/simple_mem.cc @@ -271,7 +271,7 @@ SimpleMemory::drain() SimpleMemory::MemoryPort::MemoryPort(const std::string& _name, SimpleMemory& _memory) - : ResponsePort(_name, &_memory), mem(_memory) + : ResponsePort(_name), mem(_memory) { } AddrRangeList diff --git a/src/mem/sys_bridge.cc b/src/mem/sys_bridge.cc index 3037a1d287..6c05ade942 100644 --- a/src/mem/sys_bridge.cc +++ b/src/mem/sys_bridge.cc @@ -43,9 +43,9 @@ SysBridge::BridgingPort::replaceReqID(PacketPtr pkt) } SysBridge::SysBridge(const SysBridgeParams &p) : SimObject(p), - sourcePort(p.name + ".source_port", this, &targetPort, + sourcePort(p.name + ".source_port", &targetPort, p.target->getRequestorId(this)), - targetPort(p.name + ".target_port", this, &sourcePort, + targetPort(p.name + ".target_port", &sourcePort, p.source->getRequestorId(this)) {} diff --git a/src/mem/sys_bridge.hh b/src/mem/sys_bridge.hh index 15a3fc8270..d26139fc0e 100644 --- a/src/mem/sys_bridge.hh +++ b/src/mem/sys_bridge.hh @@ -130,9 +130,9 @@ class SysBridge : public SimObject SysBridgeSourcePort *sourcePort; public: - SysBridgeTargetPort(const std::string &_name, SimObject *owner, + SysBridgeTargetPort(const std::string &_name, SysBridgeSourcePort *source_port, RequestorID _id) : - RequestPort(_name, owner), BridgingPort(_id), + RequestPort(_name), BridgingPort(_id), sourcePort(source_port) { DPRINTF(SysBridge, "Target side requestor ID = %s.\n", _id); @@ -223,9 +223,9 @@ class SysBridge : public SimObject SysBridgeTargetPort *targetPort; public: - SysBridgeSourcePort(const std::string &_name, SimObject *owner, + SysBridgeSourcePort(const std::string &_name, SysBridgeTargetPort *target_port, RequestorID _id) : - ResponsePort(_name, owner), BridgingPort(_id), + ResponsePort(_name), BridgingPort(_id), targetPort(target_port) { DPRINTF(SysBridge, "Source side requestor ID = %s.\n", _id); diff --git a/src/mem/thread_bridge.cc b/src/mem/thread_bridge.cc index efaf19a0e2..0090e4217c 100644 --- a/src/mem/thread_bridge.cc +++ b/src/mem/thread_bridge.cc @@ -40,7 +40,7 @@ ThreadBridge::ThreadBridge(const ThreadBridgeParams &p) ThreadBridge::IncomingPort::IncomingPort(const std::string &name, ThreadBridge &device) - : ResponsePort(name, &device), device_(device) + : ResponsePort(name), device_(device) { } @@ -94,7 +94,7 @@ ThreadBridge::IncomingPort::recvMemBackdoorReq(const MemBackdoorReq &req, ThreadBridge::OutgoingPort::OutgoingPort(const std::string &name, ThreadBridge &device) - : RequestPort(name, &device), device_(device) + : RequestPort(name), device_(device) { } diff --git a/src/mem/token_port.hh b/src/mem/token_port.hh index 8a2d15dce5..1bb8707eae 100644 --- a/src/mem/token_port.hh +++ b/src/mem/token_port.hh @@ -50,7 +50,7 @@ class TokenRequestPort : public RequestPort public: TokenRequestPort(const std::string& name, SimObject* owner, PortID id = InvalidPortID) : - RequestPort(name, owner, id), tokenManager(nullptr) + RequestPort(name, id), tokenManager(nullptr) { } /** @@ -98,9 +98,9 @@ class TokenResponsePort : public ResponsePort void recvRespRetry() override; public: - TokenResponsePort(const std::string& name, ClockedObject *owner, + TokenResponsePort(const std::string& name, PortID id = InvalidPortID) : - ResponsePort(name, owner, id), tokenRequestPort(nullptr) + ResponsePort(name, id), tokenRequestPort(nullptr) { } ~TokenResponsePort() { } diff --git a/src/mem/tport.cc b/src/mem/tport.cc index ad8512ce8a..7e8a1093de 100644 --- a/src/mem/tport.cc +++ b/src/mem/tport.cc @@ -46,7 +46,7 @@ namespace gem5 SimpleTimingPort::SimpleTimingPort(const std::string& _name, SimObject* _owner) : - QueuedResponsePort(_name, _owner, queueImpl), queueImpl(*_owner, *this) + QueuedResponsePort(_name, queueImpl), queueImpl(*_owner, *this) { } diff --git a/src/sim/system.cc b/src/sim/system.cc index ee6c70a5d3..806eca3ddf 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -165,7 +165,7 @@ System::Threads::quiesceTick(ContextID id, Tick when) int System::numSystemsRunning = 0; System::System(const Params &p) - : SimObject(p), _systemPort("system_port", this), + : SimObject(p), _systemPort("system_port"), multiThread(p.multi_thread), init_param(p.init_param), physProxy(_systemPort, p.cache_line_size), diff --git a/src/sim/system.hh b/src/sim/system.hh index d691fb8bf8..1d179e962a 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -87,8 +87,8 @@ class System : public SimObject, public PCEventScope /** * Create a system port with a name and an owner. */ - SystemPort(const std::string &_name, SimObject *_owner) - : RequestPort(_name, _owner) + SystemPort(const std::string &_name) + : RequestPort(_name) { } bool diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 5f342665fe..58abfbad46 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -52,7 +52,7 @@ OutgoingRequestBridge::~OutgoingRequestBridge() OutgoingRequestBridge:: OutgoingRequestPort::OutgoingRequestPort(const std::string &name_, OutgoingRequestBridge* owner_) : - ResponsePort(name_, owner_) + ResponsePort(name_) { owner = owner_; } diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.hh b/src/systemc/tlm_bridge/gem5_to_tlm.hh index 35d6ba3b4d..565a1648bc 100644 --- a/src/systemc/tlm_bridge/gem5_to_tlm.hh +++ b/src/systemc/tlm_bridge/gem5_to_tlm.hh @@ -145,7 +145,7 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase public: BridgeResponsePort(const std::string &name_, Gem5ToTlmBridge &bridge_) : - ResponsePort(name_, nullptr), bridge(bridge_) + ResponsePort(name_), bridge(bridge_) {} }; diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.hh b/src/systemc/tlm_bridge/tlm_to_gem5.hh index ca5f681c9c..32c477e6f2 100644 --- a/src/systemc/tlm_bridge/tlm_to_gem5.hh +++ b/src/systemc/tlm_bridge/tlm_to_gem5.hh @@ -109,7 +109,7 @@ class TlmToGem5Bridge : public TlmToGem5BridgeBase public: BridgeRequestPort(const std::string &name_, TlmToGem5Bridge &bridge_) : - RequestPort(name_, nullptr), bridge(bridge_) + RequestPort(name_), bridge(bridge_) {} }; From a0f6f85ad14062c8f08a40a64ae0fb6dbd35d551 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Tue, 24 Jan 2023 10:02:17 +0000 Subject: [PATCH 167/492] sim: Suppress deleted operator= warn in Sys::Threads::const_it Swapping the reference member to threads for a pointer restores trivial copiablity and movability. Change-Id: I18d3a5b908d8575aef198f457b85060aa202bd5f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67454 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- src/sim/system.hh | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/sim/system.hh b/src/sim/system.hh index 1d179e962a..d2725c32a9 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -152,19 +152,16 @@ class System : public SimObject, public PCEventScope class const_iterator { private: - const Threads &threads; + Threads const* threads; int pos; friend class Threads; const_iterator(const Threads &_threads, int _pos) : - threads(_threads), pos(_pos) + threads(&_threads), pos(_pos) {} public: - const_iterator(const const_iterator &) = default; - const_iterator &operator = (const const_iterator &) = default; - using iterator_category = std::forward_iterator_tag; using value_type = ThreadContext *; using difference_type = int; @@ -181,16 +178,16 @@ class System : public SimObject, public PCEventScope const_iterator operator ++ (int) { - return const_iterator(threads, pos++); + return const_iterator(*threads, pos++); } - reference operator * () { return threads.thread(pos).context; } - pointer operator -> () { return &threads.thread(pos).context; } + reference operator * () { return threads->thread(pos).context; } + pointer operator -> () { return &threads->thread(pos).context; } bool operator == (const const_iterator &other) const { - return &threads == &other.threads && pos == other.pos; + return threads == other.threads && pos == other.pos; } bool From cd2f8b3e6f091857b11f663fec7a0515107377d7 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Fri, 20 Jan 2023 09:37:30 +0000 Subject: [PATCH 168/492] base: Enable non-copiable types in gem5_assert message formatting Previous implementation was taking string formatting arguments by value, which requires copiability or movability. Took the oportunity to scope the helper functions inside the macro using lambdas. Change-Id: I2cefc18df1e99b70e60e64588df61eb72a3e5166 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67335 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce --- src/base/logging.hh | 33 ++++++++++++--------------------- src/base/logging.test.cc | 3 +++ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/base/logging.hh b/src/base/logging.hh index 8949b0cced..22fd2a84d2 100644 --- a/src/base/logging.hh +++ b/src/base/logging.hh @@ -43,7 +43,6 @@ #include #include -#include #include #include "base/compiler.hh" @@ -289,24 +288,10 @@ class Logger #define NDEBUG_DEFINED 0 #endif -template -inline std::string -_assertMsg(const std::string &format, Args... args) -{ - return std::string(": ") + csprintf(format, args...); -} - -inline const char * -_assertMsg() -{ - return ""; -} - /** * The assert macro will function like a normal assert, but will use panic * instead of straight abort(). This allows to perform some cleaning up in - * ExitLogger::exit() before calling abort(). This macro will not check its - * condition in fast builds, but it must still be valid code. + * ExitLogger::exit() before calling abort(). * * @param cond Condition that is checked; if false -> panic * @param ... Printf-based format string with arguments, extends printout. @@ -315,11 +300,17 @@ _assertMsg() * * @ingroup api_logger */ -#define gem5_assert(cond, ...) \ - do { \ - if (GEM5_UNLIKELY(!NDEBUG_DEFINED && !static_cast(cond))) { \ - panic("assert(" #cond ") failed%s", _assertMsg(__VA_ARGS__)); \ - } \ +#define gem5_assert(cond, ...) \ + do { \ + GEM5_UNLIKELY(NDEBUG_DEFINED || static_cast(cond)) ? \ + void(0) : \ + [](const auto&... args) { \ + auto msg = [&]{ \ + if constexpr (sizeof...(args) == 0) return ""; \ + else return std::string(": ") + csprintf(args...); \ + }; \ + panic("assert(" #cond ") failed%s", msg()); \ + }(__VA_ARGS__); \ } while (0) /** @} */ // end of api_logger diff --git a/src/base/logging.test.cc b/src/base/logging.test.cc index 38cc6059db..5d10f6e33a 100644 --- a/src/base/logging.test.cc +++ b/src/base/logging.test.cc @@ -553,6 +553,9 @@ TEST(LoggingDeathTest, gem5Assert) gem5_assert(true, "message\n"); ASSERT_DEATH(gem5_assert(false, "message\n"), ::testing::HasSubstr( "panic: assert(false) failed: message\nMemory Usage:")); + ASSERT_DEATH(gem5_assert(false, "%s, %s!\n", "Hello", "World"), + ::testing::HasSubstr( + "panic: assert(false) failed: Hello, World!\nMemory Usage:")); gem5_assert(true); ASSERT_DEATH(gem5_assert(false), ::testing::HasSubstr( "panic: assert(false) failed\nMemory Usage:")); From d7cb6ac2b1b309095820738f5fca3e84a3868b21 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Fri, 20 Jan 2023 13:01:46 +0000 Subject: [PATCH 169/492] base: Turn all logging.hh macros into expression kind In the previous version, the body of several macros was a statement (do{...} while(0);) and not an expression. In the new version, all macros are expressions. Expressions can be used everywhere a statement is expected and in other locations as well. For instance, expressions can be used with the comma operator. When doing generic programming, the comma operator helps manipulating parameter packs. With a statement-based implementation, (gem5_assert(args > 0), ...) could not be written while perfectly sound. Also, (c1 ? a : c2 ? b : (gem5_assert(c3), c)) is a usefull expression to assert completeness of cascaded conditions that cannot be easily and efficiently achieved without an expression kind of assertion. Change-Id: Ia0efeb15e6deda6b90529a6f0e00ebe2e9b5d2a0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67336 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/base/logging.hh | 125 +++++++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 55 deletions(-) diff --git a/src/base/logging.hh b/src/base/logging.hh index 22fd2a84d2..f66423f652 100644 --- a/src/base/logging.hh +++ b/src/base/logging.hh @@ -138,9 +138,10 @@ class Logger const char *prefix; }; - -#define base_message(logger, ...) \ - logger.print(::gem5::Logger::Loc(__FILE__, __LINE__), __VA_ARGS__) +#define base_message(logger, ...) \ + [&log = logger](const auto&... args) { \ + log.print(::gem5::Logger::Loc(__FILE__, __LINE__), args...); \ + }(__VA_ARGS__) /* * Only print the message the first time this expression is @@ -150,19 +151,29 @@ class Logger * would have resulted in a different message thoes messages would be * supressed. */ -#define base_message_once(...) do { \ - static bool once = false; \ - if (!once) { \ - base_message(__VA_ARGS__); \ - once = true; \ - } \ - } while (0) +#define base_message_once(logger, ...) \ + [&log = logger](const auto&... args) { \ + static bool once{false}; \ + if (GEM5_UNLIKELY(!once)) { \ + once = true; \ + base_message(log, args...); \ + } \ + }(__VA_ARGS__) -#define exit_message(logger, ...) \ - do { \ - base_message(logger, __VA_ARGS__); \ - logger.exit_helper(); \ - } while (0) +/* + * logger.exit_helper() can't be called inside the lambda for now as the + * lambda's operator() can't be [[noreturn]]. As a result, exit_message and it' + * s derivative cannot be used in functions without also specifying a return + * value, which is inconvenient if not impossible. + */ + +#define exit_message(logger, ...) \ + ( \ + [&log = logger](const auto&... args) { \ + base_message(log, args...); \ + }(__VA_ARGS__), \ + logger.exit_helper() \ + ) /** * This implements a cprintf based panic() function. panic() should @@ -200,13 +211,13 @@ class Logger * * @ingroup api_logger */ -#define panic_if(cond, ...) \ - do { \ - if (GEM5_UNLIKELY(cond)) { \ - panic("panic condition " # cond " occurred: %s", \ - ::gem5::csprintf(__VA_ARGS__)); \ - } \ - } while (0) +#define panic_if(cond, ...) \ + ( \ + GEM5_UNLIKELY(static_cast(cond)) ? \ + panic("panic condition " # cond " occurred: %s", \ + ::gem5::csprintf(__VA_ARGS__)) : \ + void(0) \ + ) /** @@ -222,13 +233,13 @@ class Logger * * @ingroup api_logger */ -#define fatal_if(cond, ...) \ - do { \ - if (GEM5_UNLIKELY(cond)) { \ - fatal("fatal condition " # cond " occurred: %s", \ - ::gem5::csprintf(__VA_ARGS__)); \ - } \ - } while (0) +#define fatal_if(cond, ...) \ + ( \ + GEM5_UNLIKELY(static_cast(cond)) ? \ + fatal("fatal condition " # cond " occurred: %s", \ + ::gem5::csprintf(__VA_ARGS__)) : \ + void(0) \ + ) /** @@ -269,17 +280,20 @@ class Logger * @ingroup api_logger * @{ */ -#define warn_if(cond, ...) \ - do { \ - if (GEM5_UNLIKELY(cond)) \ - warn(__VA_ARGS__); \ - } while (0) +#define warn_if(cond, ...) \ + ( \ + static_cast(cond) ? \ + warn(__VA_ARGS__) : \ + void(0) \ + ) #define warn_if_once(cond, ...) \ - do { \ - if (GEM5_UNLIKELY(cond)) \ - warn_once(__VA_ARGS__); \ - } while (0) + ( \ + static_cast(cond) ? \ + warn_once(__VA_ARGS__) : \ + void(0) \ + ) + /** @} */ // end of api_logger #ifdef NDEBUG @@ -300,25 +314,26 @@ class Logger * * @ingroup api_logger */ -#define gem5_assert(cond, ...) \ - do { \ - GEM5_UNLIKELY(NDEBUG_DEFINED || static_cast(cond)) ? \ - void(0) : \ - [](const auto&... args) { \ - auto msg = [&]{ \ - if constexpr (sizeof...(args) == 0) return ""; \ - else return std::string(": ") + csprintf(args...); \ - }; \ - panic("assert(" #cond ") failed%s", msg()); \ - }(__VA_ARGS__); \ - } while (0) +#define gem5_assert(cond, ...) \ + ( \ + GEM5_UNLIKELY(NDEBUG_DEFINED || static_cast(cond)) ? \ + void(0) : \ + [](const auto&... args) { \ + auto msg = [&]{ \ + if constexpr (sizeof...(args) == 0) return ""; \ + else return std::string(": ") + csprintf(args...); \ + }; \ + panic("assert(" #cond ") failed%s", msg()); \ + }(__VA_ARGS__) \ + ) + /** @} */ // end of api_logger -#define chatty_assert(...) \ - do { \ - gem5_assert(__VA_ARGS__); \ - GEM5_DEPRECATED_MACRO(chatty_assert, {}, "Please use gem5_assert()"); \ - } while(0) +#define chatty_assert(...) \ + ( \ + gem5_assert(args...), \ + GEM5_DEPRECATED_MACRO(chatty_assert, {}, "Please use gem5_assert()") \ + ) } // namespace gem5 #endif // __BASE_LOGGING_HH__ From de3dba971c0071540ffac24dd1aa5ffa6baa696b Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Feb 2023 03:26:42 -0800 Subject: [PATCH 170/492] arch-riscv: Get rid of redundant reset fault invocation. It was added in one change, another pending change which also added it was rebased on top of it, and the redundant addition was left in when the second change was submitted. Change-Id: I3faf53bca983d8568af45ec7174c2a064eadc0a6 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67571 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Roger Chang --- src/arch/riscv/bare_metal/fs_workload.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/arch/riscv/bare_metal/fs_workload.cc b/src/arch/riscv/bare_metal/fs_workload.cc index 83f541157a..4f7adb31b6 100644 --- a/src/arch/riscv/bare_metal/fs_workload.cc +++ b/src/arch/riscv/bare_metal/fs_workload.cc @@ -59,11 +59,6 @@ BareMetal::initState() { Workload::initState(); - for (auto *tc: system->threads) { - RiscvISA::Reset().invoke(tc); - tc->activate(); - } - warn_if(!bootloader->buildImage().write(system->physProxy), "Could not load sections to memory."); From c853187273bb88118704b1af53cfc3b6e1ea0d29 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Feb 2023 04:50:19 -0800 Subject: [PATCH 171/492] arch: Add a virtual method to the BaseISA to reset its ThreadContext. This will be used as part of a generic CPU reset mechanism. Change-Id: I010f6bdaca0cbb6be1799ccdc345c4828515209d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67572 Reviewed-by: Giacomo Travaglini Maintainer: Gabe Black Tested-by: kokoro --- src/arch/generic/isa.hh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/arch/generic/isa.hh b/src/arch/generic/isa.hh index e4e7929385..e9e4d95d7b 100644 --- a/src/arch/generic/isa.hh +++ b/src/arch/generic/isa.hh @@ -43,6 +43,7 @@ #include #include "arch/generic/pcstate.hh" +#include "base/logging.hh" #include "cpu/reg_class.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -83,6 +84,8 @@ class BaseISA : public SimObject virtual bool inUserMode() const = 0; virtual void copyRegsFrom(ThreadContext *src) = 0; + virtual void resetThread() { panic("Thread reset not implemented."); } + const RegClasses ®Classes() const { return _regClasses; } // Locked memory handling functions. From c9719b44a3ce69b65012ce180142c6014127f718 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Feb 2023 04:53:50 -0800 Subject: [PATCH 172/492] arch-riscv: Implement the resetThread method on the ISA object. This method invokes a Reset fault on the associated ThreadContext. Change-Id: Ie0725b06e0b506640b9038a986a9c56d9eed7011 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67573 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Roger Chang --- src/arch/riscv/isa.cc | 7 +++++++ src/arch/riscv/isa.hh | 2 ++ 2 files changed, 9 insertions(+) diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 6e4c380d98..3809c61d63 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -34,6 +34,7 @@ #include #include +#include "arch/riscv/faults.hh" #include "arch/riscv/interrupts.hh" #include "arch/riscv/mmu.hh" #include "arch/riscv/pagetable.hh" @@ -723,6 +724,12 @@ ISA::globalClearExclusive() tc->getCpuPtr()->wakeup(tc->threadId()); } +void +ISA::resetThread() +{ + Reset().invoke(tc); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index e332956972..5a2a610479 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -127,6 +127,8 @@ class ISA : public BaseISA void globalClearExclusive() override; + void resetThread() override; + RiscvType rvType() const { return rv_type; } }; From a2d321d47591615aca2a54ce0b770f8ea4888ba1 Mon Sep 17 00:00:00 2001 From: Wei-Han Chen Date: Tue, 17 Jan 2023 02:52:52 +0000 Subject: [PATCH 173/492] fastmodel: change the constructor of bridges This CL changes the construction of bridges between amba and tlm. This enables us to add parameters when using this bridge. Change-Id: I4bbbe8fb1c2573a796a3a0a7976adf3553bbaa86 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67297 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/arch/arm/fastmodel/amba_from_tlm_bridge.cc | 11 +++-------- src/arch/arm/fastmodel/amba_from_tlm_bridge.hh | 7 ++++++- src/arch/arm/fastmodel/amba_to_tlm_bridge.cc | 10 ++-------- src/arch/arm/fastmodel/amba_to_tlm_bridge.hh | 7 ++++++- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc index 4baf0ef7aa..8db0d6af63 100644 --- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc +++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc @@ -37,7 +37,9 @@ namespace gem5 namespace fastmodel { -AmbaFromTlmBridge64::AmbaFromTlmBridge64(const sc_core::sc_module_name& name) : +AmbaFromTlmBridge64::AmbaFromTlmBridge64( + const AmbaFromTlmBridge64Params ¶ms, + const sc_core::sc_module_name& name) : amba_pv::amba_pv_from_tlm_bridge<64>(name), targetProxy("target_proxy"), initiatorProxy("initiator_proxy"), @@ -116,11 +118,4 @@ AmbaFromTlmBridge64::syncControlExtension(amba_pv::amba_pv_transaction &trans) } } // namespace fastmodel - -fastmodel::AmbaFromTlmBridge64 * -AmbaFromTlmBridge64Params::create() const -{ - return new fastmodel::AmbaFromTlmBridge64(name.c_str()); -} - } // namespace gem5 diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh index 8ea8b8a731..11f7b5d400 100644 --- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh +++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh @@ -33,6 +33,7 @@ #include "amba_pv.h" #pragma GCC diagnostic pop #include "arch/arm/fastmodel/amba_ports.hh" +#include "params/AmbaFromTlmBridge64.hh" #include "systemc/tlm_port_wrapper.hh" namespace gem5 @@ -46,7 +47,11 @@ namespace fastmodel class AmbaFromTlmBridge64 : public amba_pv::amba_pv_from_tlm_bridge<64> { public: - AmbaFromTlmBridge64(const sc_core::sc_module_name &name); + AmbaFromTlmBridge64(const AmbaFromTlmBridge64Params ¶ms, + const sc_core::sc_module_name &name); + AmbaFromTlmBridge64(const AmbaFromTlmBridge64Params ¶ms) : + AmbaFromTlmBridge64(params, params.name.c_str()) + {} gem5::Port &gem5_getPort(const std::string &if_name, int idx=-1) override; diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc index 58f6eeab6b..888e077386 100644 --- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc +++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc @@ -71,7 +71,8 @@ struct FarAtomicOpFunctor : public AtomicOpFunctor namespace fastmodel { -AmbaToTlmBridge64::AmbaToTlmBridge64(const sc_core::sc_module_name& name) : +AmbaToTlmBridge64::AmbaToTlmBridge64(const AmbaToTlmBridge64Params ¶ms, + const sc_core::sc_module_name& name) : amba_pv::amba_pv_to_tlm_bridge<64>(name), targetProxy("target_proxy"), initiatorProxy("initiator_proxy"), @@ -198,11 +199,4 @@ AmbaToTlmBridge64::setupControlExtension(amba_pv::amba_pv_transaction &trans) } } // namespace fastmodel - -fastmodel::AmbaToTlmBridge64 * -AmbaToTlmBridge64Params::create() const -{ - return new fastmodel::AmbaToTlmBridge64(name.c_str()); -} - } // namespace gem5 diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh index addaac67f9..176d31fbf1 100644 --- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh +++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh @@ -33,6 +33,7 @@ #include "amba_pv.h" #pragma GCC diagnostic pop #include "arch/arm/fastmodel/amba_ports.hh" +#include "params/AmbaToTlmBridge64.hh" #include "systemc/tlm_port_wrapper.hh" namespace gem5 @@ -46,7 +47,11 @@ namespace fastmodel class AmbaToTlmBridge64 : public amba_pv::amba_pv_to_tlm_bridge<64> { public: - AmbaToTlmBridge64(const sc_core::sc_module_name &name); + AmbaToTlmBridge64(const AmbaToTlmBridge64Params ¶ms, + const sc_core::sc_module_name &name); + AmbaToTlmBridge64(const AmbaToTlmBridge64Params ¶ms) : + AmbaToTlmBridge64(params, params.name.c_str()) + {} gem5::Port &gem5_getPort(const std::string &if_name, int idx=-1) override; From 59e16b5695e3af14324916d98597b6574819dd1f Mon Sep 17 00:00:00 2001 From: Wei-Han Chen Date: Fri, 3 Feb 2023 03:21:27 +0000 Subject: [PATCH 174/492] fastmodel: forward stream ID to gem5 This CL enables forwarding stream ID from amba_pv to gem5 world. The stream ID information is originally stored in master_id of pv::TransactionAtrribute, then it will be stored to m_id of amba_pv::amba_pv_extension. This CL brings the information to stream ID field of Gem5SystemC::ControlExtension. Then the information can be set to stream ID of the gem5 packet's request. After bringing the information to gem5, we can identify the packet's stream ID from gem5 side. One example usage is PL330. In PL330_DMAC, each transaction is associated with a stream ID. If we can identitfy the stream ID, we can, for example, set attribute to specific DMAC channel. Change-Id: I943ce49fde57b0bcfc18b58c7566eec61cc676f4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67591 Reviewed-by: Yu-hsin Wang Tested-by: kokoro Maintainer: Gabe Black --- src/arch/arm/fastmodel/FastModel.py | 4 ++++ src/arch/arm/fastmodel/amba_to_tlm_bridge.cc | 7 ++++++- src/arch/arm/fastmodel/amba_to_tlm_bridge.hh | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/fastmodel/FastModel.py b/src/arch/arm/fastmodel/FastModel.py index 8a28522db4..1ea3c5e8d7 100644 --- a/src/arch/arm/fastmodel/FastModel.py +++ b/src/arch/arm/fastmodel/FastModel.py @@ -108,6 +108,10 @@ class AmbaToTlmBridge64(SystemC_ScModule): amba = AmbaTargetSocket(64, "AMBA PV target socket") tlm = TlmInitiatorSocket(64, "TLM initiator socket") + set_stream_id = Param.Bool( + False, "Set this true to forward stream ID to gem5 world" + ) + class AmbaFromTlmBridge64(SystemC_ScModule): type = "AmbaFromTlmBridge64" diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc index 888e077386..2f065fcdfe 100644 --- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc +++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc @@ -77,7 +77,8 @@ AmbaToTlmBridge64::AmbaToTlmBridge64(const AmbaToTlmBridge64Params ¶ms, targetProxy("target_proxy"), initiatorProxy("initiator_proxy"), tlmWrapper(initiatorProxy, std::string(name) + ".tlm", -1), - ambaWrapper(amba_pv_s, std::string(name) + ".amba", -1) + ambaWrapper(amba_pv_s, std::string(name) + ".amba", -1), + setStreamId(params.set_stream_id) { targetProxy.register_b_transport(this, &AmbaToTlmBridge64::bTransport); targetProxy.register_get_direct_mem_ptr( @@ -191,6 +192,10 @@ AmbaToTlmBridge64::setupControlExtension(amba_pv::amba_pv_transaction &trans) control_ex->setSecure(!amba_ex->is_non_secure()); control_ex->setInstruction(amba_ex->is_instruction()); + if (setStreamId) { + control_ex->setStreamId(amba_ex->get_id()); + } + if (trans.has_mm()) { trans.set_auto_extension(control_ex); } else { diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh index 176d31fbf1..6729604f90 100644 --- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh +++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh @@ -71,6 +71,7 @@ class AmbaToTlmBridge64 : public amba_pv::amba_pv_to_tlm_bridge<64> AmbaToTlmBridge64, 64, tlm::tlm_base_protocol_types> initiatorProxy; sc_gem5::TlmInitiatorWrapper<64> tlmWrapper; AmbaTarget ambaWrapper; + bool setStreamId; }; } // namespace fastmodel From a513e06a1baf762d65d1d8c6dc0297542460e8f6 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Feb 2023 07:03:47 -0800 Subject: [PATCH 175/492] fastmodel: Export the reset signals of the GIC. These are the "reset" and "po_reset" lines. It seems reasonable that these are the normal reset and the power on reset signals, but that's not spelled out in the fast model "lisa" file, nor does it explain exactly what the difference is between them. Change-Id: I686b4d973fc3cfff8a3ec05f8c95ee2cb6ff6698 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67575 Reviewed-by: Jui-min Lee Maintainer: Gabe Black Tested-by: kokoro --- src/arch/arm/fastmodel/GIC/FastModelGIC.py | 4 ++++ src/arch/arm/fastmodel/GIC/GIC.lisa | 7 +++++++ src/arch/arm/fastmodel/GIC/gic.cc | 21 ++++++++++++++++++++- src/arch/arm/fastmodel/GIC/gic.hh | 5 +++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/arch/arm/fastmodel/GIC/FastModelGIC.py b/src/arch/arm/fastmodel/GIC/FastModelGIC.py index ce0a8c5fb4..b1a9a3c8a1 100644 --- a/src/arch/arm/fastmodel/GIC/FastModelGIC.py +++ b/src/arch/arm/fastmodel/GIC/FastModelGIC.py @@ -42,6 +42,7 @@ from m5.SimObject import SimObject from m5.objects.FastModel import AmbaInitiatorSocket, AmbaTargetSocket from m5.objects.Gic import BaseGic from m5.objects.IntPin import VectorIntSourcePin +from m5.objects.ResetPort import ResetResponsePort from m5.objects.SystemC import SystemC_ScModule GICV3_COMMS_TARGET_ROLE = "GICV3 COMMS TARGET" @@ -850,6 +851,9 @@ class FastModelGIC(BaseGic): wake_request = VectorIntSourcePin("GIC wake request initiator") + reset = ResetResponsePort("Reset") + po_reset = ResetResponsePort("Power on reset") + # Used for DTB autogeneration _state = FdtState(addr_cells=2, size_cells=2, interrupt_cells=3) diff --git a/src/arch/arm/fastmodel/GIC/GIC.lisa b/src/arch/arm/fastmodel/GIC/GIC.lisa index 34b09c8366..5443b55e06 100644 --- a/src/arch/arm/fastmodel/GIC/GIC.lisa +++ b/src/arch/arm/fastmodel/GIC/GIC.lisa @@ -56,6 +56,10 @@ component GIC // Outgoing wake requests. gic.wake_request => self.wake_request; + // Reset signals. + self.normal_reset => gic.reset; + self.po_reset => gic.po_reset; + // Internal ports for PPI and SPI programmatic access. self.ppi_0 => gic.ppi_in_0; self.ppi_1 => gic.ppi_in_1; @@ -405,6 +409,9 @@ component GIC } } + slave port normal_reset; + slave port po_reset; + internal slave port spi[988]; internal slave port ppi_0[16]; diff --git a/src/arch/arm/fastmodel/GIC/gic.cc b/src/arch/arm/fastmodel/GIC/gic.cc index 493aa81fcd..5f01cfb8bb 100644 --- a/src/arch/arm/fastmodel/GIC/gic.cc +++ b/src/arch/arm/fastmodel/GIC/gic.cc @@ -72,10 +72,15 @@ SCGIC::Terminator::sendTowardsCPU(uint8_t len, const uint8_t *data) SCGIC::SCGIC(const SCFastModelGICParams ¶ms, sc_core::sc_module_name _name) - : scx_evs_GIC(_name), _params(params) + : scx_evs_GIC(_name), _params(params), + resetPort(params.name + ".reset", 0), + poResetPort(params.name + ".po_reset", 0) { signalInterrupt.bind(signal_interrupt); + resetPort.signal_out.bind(scx_evs_GIC::normal_reset); + poResetPort.signal_out.bind(scx_evs_GIC::po_reset); + for (int i = 0; i < wake_request.size(); i++) { wakeRequests.emplace_back( new SignalReceiver(csprintf("%s.wakerequest[%d]", name(), i))); @@ -298,6 +303,18 @@ SCGIC::SCGIC(const SCFastModelGICParams ¶ms, set_parameter("gic.consolidators", params.consolidators); } +Port & +SCGIC::gem5_getPort(const std::string &if_name, int idx) +{ + if (if_name == "reset") { + return resetPort; + } else if (if_name == "po_reset") { + return poResetPort; + } else { + return scx_evs_GIC::gem5_getPort(if_name, idx); + } +} + void SCGIC::before_end_of_elaboration() { @@ -341,6 +358,8 @@ GIC::getPort(const std::string &if_name, PortID idx) return *ptr; } else if (if_name == "wake_request") { return *wakeRequestPorts.at(idx); + } else if (if_name == "reset" || if_name == "po_reset") { + return scGIC->gem5_getPort(if_name, idx); } else { return BaseGic::getPort(if_name, idx); } diff --git a/src/arch/arm/fastmodel/GIC/gic.hh b/src/arch/arm/fastmodel/GIC/gic.hh index 0e502fc633..070fe3bb77 100644 --- a/src/arch/arm/fastmodel/GIC/gic.hh +++ b/src/arch/arm/fastmodel/GIC/gic.hh @@ -37,6 +37,7 @@ #include "arch/arm/fastmodel/amba_ports.hh" #include "arch/arm/fastmodel/common/signal_receiver.hh" +#include "arch/arm/fastmodel/common/signal_sender.hh" #include "dev/arm/base_gic.hh" #include "dev/intpin.hh" #include "params/FastModelGIC.hh" @@ -91,9 +92,13 @@ class SCGIC : public scx_evs_GIC SCGIC(const SCFastModelGICParams &p) : SCGIC(p, p.name.c_str()) {} SCGIC(const SCFastModelGICParams ¶ms, sc_core::sc_module_name _name); + Port &gem5_getPort(const std::string &if_name, int idx) override; + SignalInterruptInitiatorSocket signalInterrupt; std::vector> wakeRequests; + SignalSender resetPort; + SignalSender poResetPort; void before_end_of_elaboration() override; From 8a774e07b259c2d0e5507d39a1c234727c9de5ce Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Mon, 6 Feb 2023 12:51:22 +0000 Subject: [PATCH 176/492] dev-amdgpu: Patch forgotten port after mem port owner deprecation Change-Id: I82f88b8962d9f04521e549ca1383c42f2b5b3ffc Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67631 Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/mem/ruby/system/GPUCoalescer.cc | 2 +- src/mem/ruby/system/GPUCoalescer.hh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index a0808faa2b..8bde3f7bc8 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -190,7 +190,7 @@ GPUCoalescer::GPUCoalescer(const Params &p) false, Event::Progress_Event_Pri), uncoalescedTable(this), deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check"), - gmTokenPort(name() + ".gmTokenPort", this) + gmTokenPort(name() + ".gmTokenPort") { m_store_waiting_on_load_cycles = 0; m_store_waiting_on_store_cycles = 0; diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index 1120947fd5..dd28855547 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -216,9 +216,9 @@ class GPUCoalescer : public RubyPort class GMTokenPort : public TokenResponsePort { public: - GMTokenPort(const std::string& name, ClockedObject *owner, + GMTokenPort(const std::string& name, PortID id = InvalidPortID) - : TokenResponsePort(name, owner, id) + : TokenResponsePort(name, id) { } ~GMTokenPort() { } From 7371e468225cfacc871e5b965f99b130a2fcc123 Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Wed, 1 Feb 2023 21:54:05 -0800 Subject: [PATCH 177/492] mem: use default backdoor behavior for thread_bridge The original backdoor implementation is incorrect. We use simply fallback to default (disable backdoor) as backdoor across threads is not thread-safe in most of cases. Change-Id: Ia39be0dda4f16917cc3565eb5b012270e6d7697a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67531 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Yu-hsin Wang --- src/mem/thread_bridge.cc | 14 -------------- src/mem/thread_bridge.hh | 4 ---- 2 files changed, 18 deletions(-) diff --git a/src/mem/thread_bridge.cc b/src/mem/thread_bridge.cc index 0090e4217c..5af2a59de3 100644 --- a/src/mem/thread_bridge.cc +++ b/src/mem/thread_bridge.cc @@ -64,12 +64,6 @@ ThreadBridge::IncomingPort::recvRespRetry() // AtomicResponseProtocol Tick -ThreadBridge::IncomingPort::recvAtomicBackdoor(PacketPtr pkt, - MemBackdoorPtr &backdoor) -{ - panic("ThreadBridge only supports atomic/functional access."); -} -Tick ThreadBridge::IncomingPort::recvAtomic(PacketPtr pkt) { EventQueue::ScopedMigration migrate(device_.eventQueue()); @@ -84,14 +78,6 @@ ThreadBridge::IncomingPort::recvFunctional(PacketPtr pkt) device_.out_port_.sendFunctional(pkt); } -void -ThreadBridge::IncomingPort::recvMemBackdoorReq(const MemBackdoorReq &req, - MemBackdoorPtr &backdoor) -{ - EventQueue::ScopedMigration migrate(device_.eventQueue()); - device_.out_port_.sendMemBackdoorReq(req, backdoor); -} - ThreadBridge::OutgoingPort::OutgoingPort(const std::string &name, ThreadBridge &device) : RequestPort(name), device_(device) diff --git a/src/mem/thread_bridge.hh b/src/mem/thread_bridge.hh index 92cb078dd1..8a253fdd55 100644 --- a/src/mem/thread_bridge.hh +++ b/src/mem/thread_bridge.hh @@ -55,14 +55,10 @@ class ThreadBridge : public SimObject void recvRespRetry() override; // AtomicResponseProtocol - Tick recvAtomicBackdoor(PacketPtr pkt, - MemBackdoorPtr &backdoor) override; Tick recvAtomic(PacketPtr pkt) override; // FunctionalResponseProtocol void recvFunctional(PacketPtr pkt) override; - void recvMemBackdoorReq(const MemBackdoorReq &req, - MemBackdoorPtr &backdoor) override; private: ThreadBridge &device_; From e44cbe724b7c1746eea17e2e3b71b3806270ec7d Mon Sep 17 00:00:00 2001 From: Earl Ou Date: Wed, 1 Feb 2023 21:55:51 -0800 Subject: [PATCH 178/492] sim: handle async events in main thread only In the current implementation pollqueue is not thread safe. The design of multi threads handle async events is thus causing issue in parallel environment. Given the low rate of async events, it should be OK to only handle them in the main thread to avoid unexpected racing issues. Change-Id: Iddd512235e84e9d77f60985bb1771aa4cc693004 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67533 Reviewed-by: Gabe Black Reviewed-by: Yu-hsin Wang Maintainer: Gabe Black Tested-by: kokoro --- src/sim/simulate.cc | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc index 86d516d39a..abd2b1d391 100644 --- a/src/sim/simulate.cc +++ b/src/sim/simulate.cc @@ -43,7 +43,6 @@ #include "sim/simulate.hh" #include -#include #include #include "base/logging.hh" @@ -273,28 +272,6 @@ terminateEventQueueThreads() } -/** - * Test and clear the global async_event flag, such that each time the - * flag is cleared, only one thread returns true (and thus is assigned - * to handle the corresponding async event(s)). - */ -static bool -testAndClearAsyncEvent() -{ - static std::mutex mutex; - - bool was_set = false; - mutex.lock(); - - if (async_event) { - was_set = true; - async_event = false; - } - - mutex.unlock(); - return was_set; -} - /** * The main per-thread simulation loop. This loop is executed by all * simulation threads (the main thread and the subordinate threads) in @@ -307,6 +284,8 @@ doSimLoop(EventQueue *eventq) curEventQueue(eventq); eventq->handleAsyncInsertions(); + bool mainQueue = eventq == getEventQueue(0); + while (1) { // there should always be at least one event (the SimLoopExitEvent // we just scheduled) in the queue @@ -314,7 +293,8 @@ doSimLoop(EventQueue *eventq) assert(curTick() <= eventq->nextTick() && "event scheduled in the past"); - if (async_event && testAndClearAsyncEvent()) { + if (mainQueue && async_event) { + async_event = false; // Take the event queue lock in case any of the service // routines want to schedule new events. std::lock_guard lock(*eventq); From aee282b79f1efd556fb6fdda1165c0991f4457e6 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Wed, 16 Nov 2022 15:26:33 -0800 Subject: [PATCH 179/492] tests: Update testing documentation This edits the documentation regarding the usage of the --isa tag, as this has fallen out of date in regards to the new 'ALL' isa. Change-Id: I3b672ac2c03dd109bba458db688af05ed4135a91 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65651 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- TESTING.md | 16 ++++++++++++---- tests/gem5/fixture.py | 3 +-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/TESTING.md b/TESTING.md index 2273e31ea7..146aeac8b1 100644 --- a/TESTING.md +++ b/TESTING.md @@ -86,10 +86,10 @@ For instance, if you want to run only with `gem5.opt`, you can use ./main.py run --variant opt ``` -Or, if you want to just run X86 tests with the `gem5.opt` binary: +Or, if you want to just run quick tests with the `gem5.opt` binary: ```shell -./main.py run --length quick --variant opt --isa X86 +./main.py run --length quick --variant opt ``` @@ -102,6 +102,14 @@ To view all of the available tags, use The output is split into tag *types* (e.g., isa, variant, length) and the tags for each type are listed after the type name. +Note that when using the isa tag type, tests were traditionally sorted based +on what compilation it required. However, as tests have switched to all be +compiled under the ALL compilation, which includes all ISAs so one doesn't +need to compile each one individually, using the isa tag for ISAs other than +ALL has become a less optimal way of searching for tests. It would instead +be better to run subsets of tests based on their directories, as described +above. + You can specify "or" between tags within the same type by using the tag flag multiple times. For instance, to run everything that is tagged "opt" or "fast" use @@ -112,10 +120,10 @@ use You can also specify "and" between different types of tags by specifying more than one type on the command line. For instance, this will only run tests with -both the "X86" and "opt" tags. +both the "ALL" and "opt" tags. ```shell -./main.py run --isa X86 --variant opt +./main.py run --isa All --variant opt ``` ## Running tests in batch diff --git a/tests/gem5/fixture.py b/tests/gem5/fixture.py index c8bc79ff64..6f5dd616ab 100644 --- a/tests/gem5/fixture.py +++ b/tests/gem5/fixture.py @@ -174,8 +174,7 @@ class SConsFixture(UniqueFixture): ) log.test_log.message("%s" % (", ".join(self.targets))) log.test_log.message( - "You may want to run with only a single ISA" - "(--isa=), use --skip-build, or use 'rerun'." + "You may want to use --skip-build, or use 'rerun'." ) command.extend(self.targets) From d1f76741c6df9a832b15b4c8a7e68d0397d0253c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Feb 2023 07:50:12 -0800 Subject: [PATCH 180/492] dev: Add a definition for VectorResetResponsePort. This is just a simple extension of the regular ResetResponsePort, and is useful if there is a collection of reset pins on a device. Change-Id: I6ccb21e949d3a51bf8b788ffd23e4b2b02706da9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67576 Tested-by: kokoro Reviewed-by: Yu-hsin Wang Maintainer: Gabe Black --- src/dev/ResetPort.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/dev/ResetPort.py b/src/dev/ResetPort.py index f35bc117c0..15caa476ec 100644 --- a/src/dev/ResetPort.py +++ b/src/dev/ResetPort.py @@ -42,8 +42,15 @@ class ResetResponsePort(Port): super().__init__(RESET_RESPONSE_ROLE, desc) -# VectorResetRequestPort presents a bank of artifact reset request +# VectorResetRequestPort represents a bank of artifact reset request # ports. class VectorResetRequestPort(VectorPort): def __init__(self, desc): super().__init__(RESET_REQUEST_ROLE, desc, is_source=True) + + +# VectorResetResponsePort represents a bank of artifact reset request +# ports. +class VectorResetResponsePort(VectorPort): + def __init__(self, desc): + super().__init__(RESET_RESPONSE_ROLE, desc) From bd9e126d5e8c7d4e37833c57d96e078f7c1c273c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Feb 2023 04:59:26 -0800 Subject: [PATCH 181/492] cpu: Add a generic model_reset port on the BaseCPU. This port will stop execution on the CPU when raised. When lowered, it will allow execution to reset the state of the CPU and allow execution to resume. The state could theoretically be reset when the reset state starts, but then it wouldn't reflect the most up to date condition of the CPU when resuming. For instance, if a reset vector was set somehow, that wouldn't be updated if it was changed while reset was asserted. The tradeoff is that the state won't look like it will when execution resumes while reset is held (to GDB for instance), but that seems like a more obvious and less common sort of problem. This signal is managed by the BaseCPU itself, but is backed by a virtual method which can be overridden by other CPU types which may not work the same way or have the same components. For instance, a fast model CPU could toggle reset lines on the underlying model and let it handle resetting all the state. The fast models in particular already have a generic reset line with the same name, but they have it at the level of the fast model which may have multiple cores within it, each represented by a gem5 CPU. It isn't implemented here, but there could be some sort of cooperation between these signals where the reset at the core level is considered an "or" of the cluster level reset and the individual core level resets. At least in the A76 model, there are resets for each individual core within the cluster as well, which the generic reset toggles. Another option would be to get rid of the whole cluster reset pin, and make the user gang the resets for each of the cores together to whatever reset signal they're using. That's effectively what the cluster level reset is doing, but within the C++ of the model wrapper instead of in the python config. Change-Id: Ie6b4769298ea224ec5dc88360cbb52ee8fbbf69c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67574 Tested-by: kokoro Reviewed-by: Roger Chang Maintainer: Gabe Black Reviewed-by: Yu-hsin Wang --- src/cpu/BaseCPU.py | 3 +++ src/cpu/base.cc | 41 +++++++++++++++++++++++++++++++++++++++++ src/cpu/base.hh | 16 ++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index 438d4f45df..d77036a480 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -53,6 +53,7 @@ from m5.objects.CPUTracers import ExeTracer from m5.objects.SubSystem import SubSystem from m5.objects.ClockDomain import * from m5.objects.Platform import Platform +from m5.objects.ResetPort import ResetResponsePort default_tracer = ExeTracer() @@ -153,6 +154,8 @@ class BaseCPU(ClockedObject): "between CPU models)", ) + model_reset = ResetResponsePort("Generic reset for the CPU") + tracer = Param.InstTracer(default_tracer, "Instruction tracer") icache_port = RequestPort("Instruction Port") diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 98c53d4895..60d443af8c 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -47,6 +47,8 @@ #include #include +#include "arch/generic/decoder.hh" +#include "arch/generic/isa.hh" #include "arch/generic/tlb.hh" #include "base/cprintf.hh" #include "base/loader/symtab.hh" @@ -130,6 +132,7 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) _dataRequestorId(p.system->getRequestorId(this, "data")), _taskId(context_switch_task_id::Unknown), _pid(invldPid), _switchedOut(p.switched_out), _cacheLineSize(p.system->cacheLineSize()), + modelResetPort(p.name + ".model_reset"), interrupts(p.interrupts), numThreads(p.numThreads), system(p.system), previousCycle(0), previousState(CPU_STATE_SLEEP), functionTraceStream(nullptr), currentFunctionStart(0), @@ -178,6 +181,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) fatal("Number of ISAs (%i) assigned to the CPU does not equal number " "of threads (%i).\n", params().isa.size(), numThreads); } + + modelResetPort.onChange([this](const bool &new_val) { + setReset(new_val); + }); } void @@ -413,6 +420,8 @@ BaseCPU::getPort(const std::string &if_name, PortID idx) return getDataPort(); else if (if_name == "icache_port") return getInstPort(); + else if (if_name == "model_reset") + return modelResetPort; else return ClockedObject::getPort(if_name, idx); } @@ -479,6 +488,12 @@ BaseCPU::findContext(ThreadContext *tc) void BaseCPU::activateContext(ThreadID thread_num) { + if (modelResetPort.state()) { + DPRINTF(Thread, "CPU in reset, not activating context %d\n", + threadContexts[thread_num]->contextId()); + return; + } + DPRINTF(Thread, "activate contextId %d\n", threadContexts[thread_num]->contextId()); // Squash enter power gating event while cpu gets activated @@ -602,6 +617,32 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU) // we are switching to. getInstPort().takeOverFrom(&oldCPU->getInstPort()); getDataPort().takeOverFrom(&oldCPU->getDataPort()); + + // Switch over the reset line as well, if necessary. + if (oldCPU->modelResetPort.isConnected()) + modelResetPort.takeOverFrom(&oldCPU->modelResetPort); +} + +void +BaseCPU::setReset(bool state) +{ + for (auto tc: threadContexts) { + if (state) { + // As we enter reset, stop execution. + tc->quiesce(); + } else { + // As we leave reset, first reset thread state, + tc->getIsaPtr()->resetThread(); + // reset the decoder in case it had partially decoded something, + tc->getDecoderPtr()->reset(); + // flush the TLBs, + tc->getMMUPtr()->flushAll(); + // Clear any interrupts, + interrupts[tc->threadId()]->clearAll(); + // and finally reenable execution. + tc->activate(); + } + } } void diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 0d56fbad89..084d9b9305 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -55,6 +55,7 @@ #include "sim/insttracer.hh" #include "sim/probe/pmu.hh" #include "sim/probe/probe.hh" +#include "sim/signal.hh" #include "sim/system.hh" namespace gem5 @@ -161,6 +162,8 @@ class BaseCPU : public ClockedObject * group. */ static std::unique_ptr globalStats; + SignalSinkPort modelResetPort; + public: /** @@ -337,6 +340,19 @@ class BaseCPU : public ClockedObject */ virtual void takeOverFrom(BaseCPU *cpu); + /** + * Set the reset of the CPU to be either asserted or deasserted. + * + * When asserted, the CPU should be stopped and waiting. When deasserted, + * the CPU should start running again, unless some other condition would + * also prevent it. At the point the reset is deasserted, it should be + * reinitialized as defined by the ISA it's running and any other relevant + * part of its configuration (reset address, etc). + * + * @param state The new state of the reset signal to this CPU. + */ + virtual void setReset(bool state); + /** * Flush all TLBs in the CPU. * From 89c49d1ab06ea5364ab1f80586f8b01c0297cb12 Mon Sep 17 00:00:00 2001 From: zhongchengyong Date: Tue, 7 Feb 2023 22:21:53 +0800 Subject: [PATCH 182/492] arch-riscv: Fix the CSR instruction behavior. The RISC-V spec clarifies the CSR instruction operation, some of them shall not read or write CSR by the hints of RD/RS1/uimm, but the original version use the 'data != oldData' condition to determine whether write or not, and always read CSR first. See CSR instruction in spec: Section 9.1 Page 56 of https://github.com/riscv/riscv-isa-manual/releases/download/Ratified-IMAFDQC/riscv-spec-20191213.pdf Change-Id: I5e7a43cf639474ae76c19a1f430d314b4634ce62 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67717 Reviewed-by: Hoa Nguyen Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- src/arch/riscv/insts/standard.hh | 19 +++++++++++++++++-- src/arch/riscv/isa/formats/standard.isa | 12 +++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh index 5b0e8c2c22..afcfd7a915 100644 --- a/src/arch/riscv/insts/standard.hh +++ b/src/arch/riscv/insts/standard.hh @@ -91,18 +91,33 @@ class CSROp : public RiscvStaticInst protected: uint64_t csr; uint64_t uimm; + bool read; + bool write; /// Constructor CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : RiscvStaticInst(mnem, _machInst, __opClass), - csr(FUNCT12), uimm(CSRIMM) + csr(FUNCT12), uimm(CSRIMM), read(true), write(true) { if (csr == CSR_SATP) { flags[IsSquashAfter] = true; } + if (strcmp(mnemonic, "csrrw") == 0 || + strcmp(mnemonic, "csrrwi") == 0) { + if (RD == 0){ + read = false; + } + } else if (strcmp(mnemonic, "csrrs") == 0 || + strcmp(mnemonic, "csrrc") == 0 || + strcmp(mnemonic, "csrrsi") == 0 || + strcmp(mnemonic, "csrrci") == 0 ){ + if (RS1 == 0) { + write = false; + } + } } - std::string generateDisassembly( + std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; }; diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa index bb500f5f49..1bd431ac4d 100644 --- a/src/arch/riscv/isa/formats/standard.isa +++ b/src/arch/riscv/isa/formats/standard.isa @@ -358,7 +358,7 @@ def template CSRExecute {{ %(op_decl)s; %(op_rd)s; - RegVal data, olddata; + RegVal data = 0, olddata = 0; auto lowestAllowedMode = (PrivilegeMode)bits(csr, 9, 8); auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV); if (pm < lowestAllowedMode) { @@ -380,11 +380,13 @@ def template CSRExecute {{ break; } - if (csr == CSR_FCSR) { + if (read) { + if (csr == CSR_FCSR) { olddata = xc->readMiscReg(MISCREG_FFLAGS) | - (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET); - } else { + (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET); + } else { olddata = xc->readMiscReg(midx); + } } olddata = rvZext(olddata); auto olddata_all = olddata; @@ -396,7 +398,7 @@ def template CSRExecute {{ %(code)s; data &= maskVal; - if (data != olddata) { + if (write) { if (bits(csr, 11, 10) == 0x3) { return std::make_shared( csprintf("CSR %s is read-only\n", csrName), machInst); From 905b8ebd2235b730840d5392f605b5cf5de2840f Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 3 Jan 2023 08:08:05 -0800 Subject: [PATCH 183/492] arch-vega: Implement ds_write_b8_d16_hi Writes a byte to the upper 16-bit input word to an address. Change-Id: I0bfd573526b9c46585d0008cde07c769b1d29ebd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67411 Maintainer: Matt Sinclair Reviewed-by: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/decoder.cc | 3 +- src/arch/amdgpu/vega/insts/instructions.cc | 62 ++++++++++++++++++++++ src/arch/amdgpu/vega/insts/instructions.hh | 34 ++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc index 18c72a4382..291dd6924a 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/decoder.cc @@ -7706,8 +7706,7 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_DS__DS_WRITE_B8_D16_HI(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_DS__DS_WRITE_B8_D16_HI(&iFmt->iFmt_DS); } GPUStaticInst* diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 6cf01fb8f9..f019dfd75e 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -34877,6 +34877,68 @@ namespace VegaISA Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) { } // completeAcc + // --- Inst_DS__DS_WRITE_B8_D16_HI class methods --- + + Inst_DS__DS_WRITE_B8_D16_HI::Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b8_d16_hi") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B8_D16_HI + + Inst_DS__DS_WRITE_B8_D16_HI::~Inst_DS__DS_WRITE_B8_D16_HI() + { + } // ~Inst_DS__DS_WRITE_B8_D16_HI + + // --- description from .arch file --- + // MEM[ADDR] = DATA[23:16]. + // Byte write in to high word. + void + Inst_DS__DS_WRITE_B8_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU8 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = bits(data[lane], 23, 16); + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B8_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B8_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc // --- Inst_DS__DS_WRITE_B16 class methods --- Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 289673232b..dc2ee08f08 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -31934,6 +31934,40 @@ namespace VegaISA void completeAcc(GPUDynInstPtr) override; }; // Inst_DS__DS_WRITE_B8 + class Inst_DS__DS_WRITE_B8_D16_HI : public Inst_DS + { + public: + Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS*); + ~Inst_DS__DS_WRITE_B8_D16_HI(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 0; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_a + return 4; + case 1: //vgpr_d0 + return 1; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_DS__DS_WRITE_B8_D16_HI + class Inst_DS__DS_WRITE_B16 : public Inst_DS { public: From bc9e90d65e3c2813d2eed70b45abd7c62702851c Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 3 Jan 2023 11:40:45 -0800 Subject: [PATCH 184/492] arch-vega: Make VGPR-offset for global SGPR-base signed The VGPR-offset used when SGPR-base addressing is used can be signed in Vega. These are global instructions of the format: `global_load_dword v0, v1, s[0:1]`. This is not explicitly stated in the ISA manual however based on compiler output the offset can be negative. This changeset assigns the offset to a signed 32-bit integer and the compiler takes care of the signedness in the expression which calculates the final address. This fixes a bad address calculation in a rocPRIM unit test. Change-Id: I271edfbb4c6344cb1a6a69a0fd3df58a6198d599 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67412 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- src/arch/amdgpu/vega/insts/op_encodings.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 34f6040495..1071eada0e 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -1007,8 +1007,9 @@ namespace VegaISA // mask any upper bits from the vaddr. for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { + ScalarRegI32 voffset = vaddr[lane]; gpuDynInst->addr.at(lane) = - saddr.rawData() + (vaddr[lane] & 0xffffffff) + offset; + saddr.rawData() + voffset + offset; } } } From 39e813374cab3d9a04e84b4474997b7a6c71ed45 Mon Sep 17 00:00:00 2001 From: Zhengrong Wang Date: Wed, 8 Feb 2023 20:41:26 -0800 Subject: [PATCH 185/492] ext: Fix typo in DRAMSIM2 Sconscript ClockDoenv should be ClockDomain. Change-Id: Ibcf3d0dc969624a4e20d86924ef834781b5bbf21 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67759 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- ext/dramsim2/SConscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/dramsim2/SConscript b/ext/dramsim2/SConscript index 95b999dc4c..7eb178d626 100644 --- a/ext/dramsim2/SConscript +++ b/ext/dramsim2/SConscript @@ -59,7 +59,7 @@ DRAMFile('AddressMapping.cpp') DRAMFile('Bank.cpp') DRAMFile('BankState.cpp') DRAMFile('BusPacket.cpp') -DRAMFile('ClockDoenv.cpp') +DRAMFile('ClockDomain.cpp') DRAMFile('CommandQueue.cpp') DRAMFile('IniReader.cpp') DRAMFile('MemoryController.cpp') From b6a591e20385f7ea2f08b1a349620cd802f30cd4 Mon Sep 17 00:00:00 2001 From: Zhengrong Wang Date: Fri, 10 Feb 2023 22:34:28 -0800 Subject: [PATCH 186/492] mem-dram: Make sure SHOW_SIM_OUTPUT is in global namespace. As stated in the comment, SHOW_SIM_OUTPUT is declared extern in the DRAMSim2 print macros. Therefore, it should be defined in the global namespace, not in gem5 namespace. Change-Id: I05245a48ac706b46085ffa8d00db3725ce16a89e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67859 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/mem/dramsim2_wrapper.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mem/dramsim2_wrapper.cc b/src/mem/dramsim2_wrapper.cc index c622c1cb57..b9bcf14c08 100644 --- a/src/mem/dramsim2_wrapper.cc +++ b/src/mem/dramsim2_wrapper.cc @@ -54,12 +54,6 @@ #include "base/compiler.hh" #include "base/logging.hh" -namespace gem5 -{ - -namespace memory -{ - /** * DRAMSim2 requires SHOW_SIM_OUTPUT to be defined (declared extern in * the DRAMSim2 print macros), otherwise we get linking errors due to @@ -67,6 +61,12 @@ namespace memory */ int SHOW_SIM_OUTPUT = 0; +namespace gem5 +{ + +namespace memory +{ + DRAMSim2Wrapper::DRAMSim2Wrapper(const std::string& config_file, const std::string& system_file, const std::string& working_dir, From 39b5b5e5113579ec49e81b14124e97bc8e7e1a6b Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 13 Feb 2023 10:58:12 -0600 Subject: [PATCH 187/492] dev-amdgpu: Fix address in POLL_REGMEM SDMA packet The address for the POLL_REGMEM packet should not be shifted when the mode is 1 (memory). Relevant driver code below is not shifting the address. The shift is causing a page fault due to the incorrect address. This changeset removes the shift so the correct address is translated. https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/ roc-4.3.x/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c#L903 Change-Id: I7a0ec3245ca14376670df24c5d3773958c08d751 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67877 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/dev/amdgpu/sdma_engine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 4c03bf57b2..736df45d9d 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -832,7 +832,7 @@ SDMAEngine::pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, auto cb = new DmaVirtCallback( [ = ] (const uint32_t &dma_buffer) { pollRegMemRead(q, header, pkt, dma_buffer, 0); }); - dmaReadVirt(pkt->address >> 3, sizeof(uint32_t), cb, + dmaReadVirt(pkt->address, sizeof(uint32_t), cb, (void *)&cb->dmaBuffer); } else { panic("SDMA poll mem operation not implemented."); From d7516a26dc004892ce03c9784222da5944ea2489 Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Fri, 7 Oct 2022 17:11:35 -0700 Subject: [PATCH 188/492] arch-vega: Implementing global_atomic_or Change-Id: I13065186313ca784054956e1165b1b2fd8ce4a19 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64511 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 54 +++++++++++++++++++++- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index f019dfd75e..987474fbfb 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -45112,8 +45112,60 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index dc2ee08f08..ddf228a76a 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -42800,6 +42800,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_OR class Inst_FLAT__FLAT_ATOMIC_XOR : public Inst_FLAT From 8375058e73dfeefe433f89c9fe00e675d9ad095a Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Fri, 7 Oct 2022 17:22:12 -0700 Subject: [PATCH 189/492] arch-vega: Implementing global_atomic_smin Change-Id: Iffb366190f9e3f7ffbacde5dbb3abc97226926d4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64512 Reviewed-by: Matt Sinclair Tested-by: kokoro Maintainer: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 53 +++++++++++++++++++++- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 987474fbfb..e3639a5901 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -44972,8 +44972,59 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods --- Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index ddf228a76a..8b0c8c43de 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -42615,6 +42615,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SMIN class Inst_FLAT__FLAT_ATOMIC_UMIN : public Inst_FLAT From bb8f370e4d36ada08afa82e51d4b3b934bf105c7 Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Fri, 7 Oct 2022 17:33:50 -0700 Subject: [PATCH 190/492] arch-vega: Implementing global_atomic_smax Change-Id: Id4053424c98eec1e98eb555bb35b48f0b5d2407b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64513 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 53 +++++++++++++++++++++- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index e3639a5901..b6a78b26e4 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -45079,8 +45079,59 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods --- Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 8b0c8c43de..d45a84c7b8 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -42691,6 +42691,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SMAX class Inst_FLAT__FLAT_ATOMIC_UMAX : public Inst_FLAT From ea9239ae092a919a0505ba15aef1595bb0ceeb49 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 9 Feb 2023 12:03:38 -0600 Subject: [PATCH 191/492] dev-amdgpu: Update deprecated ports Change-Id: Icbc5636c33b437c7396ee27363eed1cf006f8882 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67837 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/common/tlb_coalescer.hh | 2 +- src/dev/amdgpu/memory_manager.hh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/common/tlb_coalescer.hh b/src/arch/amdgpu/common/tlb_coalescer.hh index 59d8ebe888..56d72d7abb 100644 --- a/src/arch/amdgpu/common/tlb_coalescer.hh +++ b/src/arch/amdgpu/common/tlb_coalescer.hh @@ -152,7 +152,7 @@ class TLBCoalescer : public ClockedObject public: MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, PortID _index) - : RequestPort(_name, tlb_coalescer), coalescer(tlb_coalescer), + : RequestPort(_name), coalescer(tlb_coalescer), index(_index) { } std::deque retries; diff --git a/src/dev/amdgpu/memory_manager.hh b/src/dev/amdgpu/memory_manager.hh index e18ec643a6..0bd08d6ff9 100644 --- a/src/dev/amdgpu/memory_manager.hh +++ b/src/dev/amdgpu/memory_manager.hh @@ -45,11 +45,11 @@ namespace gem5 class AMDGPUMemoryManager : public ClockedObject { - class GPUMemPort : public MasterPort + class GPUMemPort : public RequestPort { public: GPUMemPort(const std::string &_name, AMDGPUMemoryManager &_gpuMemMgr) - : MasterPort(_name, &_gpuMemMgr), gpu_mem(_gpuMemMgr) + : RequestPort(_name), gpu_mem(_gpuMemMgr) { } From e10be09dcf919d50f03547924dde0157692cc8f8 Mon Sep 17 00:00:00 2001 From: hungweihsu Date: Thu, 9 Feb 2023 05:39:15 +0000 Subject: [PATCH 192/492] dev: add method to set initial register value out of constructor. The initial value of register is set in constructor but there is no standard way to assign the initial value and default value at the same time out of that. So we decided to add an extra method to set the initialValue to current register value. The usecase would be: reg.get().field1 = val1; reg.get().field2 = val2; reg.resetInitialValue(); Change-Id: Ibc5454e2945cc6aff943e6599043edd8ca442f5f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67917 Tested-by: kokoro Reviewed-by: Gabe Black Maintainer: Gabe Black --- src/dev/reg_bank.hh | 3 +++ src/dev/reg_bank.test.cc | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh index 3d8dc576cb..3a89a00ab6 100644 --- a/src/dev/reg_bank.hh +++ b/src/dev/reg_bank.hh @@ -759,6 +759,9 @@ class RegisterBank : public RegisterBankBase // constructor. This is intended to be used in a resetter function. const Data &initialValue() const { return _resetData; } + // Reset the initial value, which is normally set in the constructor, + // to the register's current value. + void resetInitialValue() { _resetData = _data; } /* * Interface for accessing the register's state, for use by the diff --git a/src/dev/reg_bank.test.cc b/src/dev/reg_bank.test.cc index 4439526e35..c618ef16d4 100644 --- a/src/dev/reg_bank.test.cc +++ b/src/dev/reg_bank.test.cc @@ -881,6 +881,28 @@ TEST_F(TypedRegisterTest, DefaultResetter) EXPECT_EQ(reg.get(), initial_value); } +// Set initial value later than constructor +TEST_F(TypedRegisterTest, LateInitialValueAssignment) +{ + BackingType initial_value = reg.get(); + BackingType new_initial_value = initial_value + 1; + + reg.get() = new_initial_value; + reg.resetInitialValue(); + + EXPECT_EQ(reg.get(), new_initial_value); + EXPECT_EQ(reg.initialValue(), new_initial_value); + + reg.get() = new_initial_value + 1; + EXPECT_EQ(reg.get(), new_initial_value + 1); + EXPECT_EQ(reg.initialValue(), new_initial_value); + + reg.reset(); + + EXPECT_EQ(reg.get(), new_initial_value); + EXPECT_EQ(reg.initialValue(), new_initial_value); +} + // Set a custom resetter for a register. TEST_F(TypedRegisterTest, Resetter) { From 4b1c24542065380c6cff7ab2baa25e216a0ad38e Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 14 Feb 2023 10:26:09 +0800 Subject: [PATCH 193/492] arch-riscv: Fix the behavior of write to status CSR According to RISC V spec Volumn I, Section 11.1, the CSR will be written only if RS1 != 0 or imm != 0. However, after the change of CL(https://gem5-review.googlesource.com/c/public/gem5/+/67717), it will cause IllegalInstFault to write status CSR if we don't change the data. Example of Instruction Fault for mstatus ``` addi a5, zero, 8 csrc mstatus, a5 ``` It will cause instruction fault if mstatus value is 0 due to "newdata_all == olddata_all". We can just simply check if the data value is changed out of mask. Change-Id: Iab4ce7ac646a9105dc04e69c24d084572e28ebab Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67897 Reviewed-by: Yu-hsin Wang Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/insts/standard.hh | 2 +- src/arch/riscv/isa/formats/standard.isa | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh index afcfd7a915..2dfe73aedf 100644 --- a/src/arch/riscv/insts/standard.hh +++ b/src/arch/riscv/insts/standard.hh @@ -111,7 +111,7 @@ class CSROp : public RiscvStaticInst strcmp(mnemonic, "csrrc") == 0 || strcmp(mnemonic, "csrrsi") == 0 || strcmp(mnemonic, "csrrci") == 0 ){ - if (RS1 == 0) { + if (RS1 == 0 || uimm == 0) { write = false; } } diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa index 1bd431ac4d..c94a0bcdbd 100644 --- a/src/arch/riscv/isa/formats/standard.isa +++ b/src/arch/riscv/isa/formats/standard.isa @@ -358,7 +358,7 @@ def template CSRExecute {{ %(op_decl)s; %(op_rd)s; - RegVal data = 0, olddata = 0; + RegVal data = 0, olddata = 0, nonmaskdata = 0; auto lowestAllowedMode = (PrivilegeMode)bits(csr, 9, 8); auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV); if (pm < lowestAllowedMode) { @@ -397,6 +397,7 @@ def template CSRExecute {{ %(code)s; + nonmaskdata = data & ~maskVal; data &= maskVal; if (write) { if (bits(csr, 11, 10) == 0x3) { @@ -419,7 +420,7 @@ def template CSRExecute {{ case CSR_SIP: case CSR_SIE: case CSR_UIP: case CSR_UIE: case CSR_MSTATUS: case CSR_SSTATUS: case CSR_USTATUS: - if (newdata_all != olddata_all) { + if (nonmaskdata == 0) { xc->setMiscReg(midx, newdata_all); } else { return std::make_shared( From 3b4f241fb5a05f29e2235d0dea33475ebbbc3185 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 17 Jan 2023 10:38:11 +0800 Subject: [PATCH 194/492] arch-riscv: Fix incorrect trap value of instruction fault As we add rv_type bit in machInst at 62, It will get the machine code with rv_type specification if we just return machInst. We only need return machine code for handling instruction fault. Change-Id: I9dd7a25047d4a13df5b47dc9e422345ba44b7b09 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67677 Reviewed-by: Yu-hsin Wang Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/faults.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/riscv/faults.hh b/src/arch/riscv/faults.hh index e66476727c..f687fd6f20 100644 --- a/src/arch/riscv/faults.hh +++ b/src/arch/riscv/faults.hh @@ -173,7 +173,7 @@ class InstFault : public RiscvFault : RiscvFault(n, FaultType::OTHERS, INST_ILLEGAL), _inst(inst) {} - RegVal trap_value() const override { return _inst; } + RegVal trap_value() const override { return bits(_inst, 31, 0); } }; class UnknownInstFault : public InstFault From f028bd55e0ef74c350d0aa75e1523e7139c4f207 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 15 Feb 2023 07:45:57 -0800 Subject: [PATCH 195/492] arch-vega: Update API for some flat atomics Some recently submitted atomic instructions were using two older APIs. Update these to use the newer APIs to support all apertures and avoid compilation issue. Change-Id: Ibd6bc00177d33236946f54ef8e5c7544af322852 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67977 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index b6a78b26e4..45c84910f2 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -44984,13 +44984,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44999,8 +44997,7 @@ namespace VegaISA } } - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); + issueRequestHelper(gpuDynInst); } // execute void @@ -45091,13 +45088,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -45106,8 +45101,7 @@ namespace VegaISA } } - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); + issueRequestHelper(gpuDynInst); } // execute void @@ -45226,13 +45220,11 @@ namespace VegaISA gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU32 data(gpuDynInst, extData.DATA); - addr.read(); data.read(); - calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -45241,8 +45233,7 @@ namespace VegaISA } } - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); + issueRequestHelper(gpuDynInst); } // execute void From 109c327209de4e6f5a7f621f59e333f61530cb66 Mon Sep 17 00:00:00 2001 From: Yan Lee Date: Tue, 14 Feb 2023 19:47:38 -0800 Subject: [PATCH 196/492] base: add extensible type Extensible is for carrying additional user-defined information. Each type of the extension will have a unique extension ID and there is a linked list of extension in every Extensible object. There will be most one extension with the same type in the linked list. With the shared_ptr, the extension will be deleted automatically. That is, the caller should allocate the extension and add into the packet. Change-Id: I54729536a305c91c751d5fb059bd2f9a3db05523 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/62892 Tested-by: kokoro Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini --- src/base/SConscript | 1 + src/base/extensible.hh | 187 ++++++++++++++++++++++++++++++++++++ src/base/extensible.test.cc | 111 +++++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 src/base/extensible.hh create mode 100644 src/base/extensible.test.cc diff --git a/src/base/SConscript b/src/base/SConscript index 4a6b65fa72..29f106a0b2 100644 --- a/src/base/SConscript +++ b/src/base/SConscript @@ -90,6 +90,7 @@ GTest('bitunion.test', 'bitunion.test.cc') GTest('channel_addr.test', 'channel_addr.test.cc', 'channel_addr.cc') GTest('circlebuf.test', 'circlebuf.test.cc') GTest('circular_queue.test', 'circular_queue.test.cc') +GTest('extensible.test', 'extensible.test.cc') GTest('sat_counter.test', 'sat_counter.test.cc') GTest('refcnt.test','refcnt.test.cc') GTest('condcodes.test', 'condcodes.test.cc') diff --git a/src/base/extensible.hh b/src/base/extensible.hh new file mode 100644 index 0000000000..eb79c71be3 --- /dev/null +++ b/src/base/extensible.hh @@ -0,0 +1,187 @@ +/* + * Copyright 2023 Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* @file + * Extensible Object Base Class Declaration + */ + +#ifndef __BASE_EXTENSIBLE_HH__ +#define __BASE_EXTENSIBLE_HH__ + +#include +#include +#include + +namespace gem5 +{ + +/** + * This is base of every extension. + */ +class ExtensionBase +{ + public: + explicit ExtensionBase(const unsigned int id) + : extID(id) {} + + virtual ~ExtensionBase() = default; + + virtual std::unique_ptr clone() const = 0; + + static unsigned int + maxNumExtensions() + { + static unsigned int max_num = 0; + return ++max_num; + } + + unsigned int getExtensionID() const { return extID; } + + private: + const unsigned int extID; +}; + +/** + * This is the extension for carrying additional information. + * Each type of extension will have a unique extensionID. + * This extensionID will assign to base class for comparsion. + */ +template +class Extension : public ExtensionBase +{ + public: + Extension() : ExtensionBase(extensionID) {} + + const static unsigned int extensionID; +}; + +template +const unsigned int Extension::extensionID = + ExtensionBase::maxNumExtensions() - 1; + +template +class Extensible +{ + public: + Extensible() = default; + Extensible(const Extensible& other) + { + // Clone every extension from other. + for (auto& ext : other.extensions) { + extensions.emplace_back(ext->clone()); + } + } + virtual ~Extensible() = default; + + /** + * Set a new extension to the packet and replace the old one, if there + * already exists the same type of extension in this packet. This new + * extension will be deleted automatically with the shared_ptr<>. + * + * @param ext Extension to set + */ + template + void + setExtension(std::shared_ptr ext) + { + static_assert(std::is_base_of::value, + "Extension should inherit from ExtensionBase."); + assert(ext.get() != nullptr); + + auto it = findExtension(); + + if (it != extensions.end()) { + // There exists the same type of extension in the list. + // Replace it to the new one. + *it = std::move(ext); + } else { + // Add ext into the linked list. + extensions.emplace_back(std::move(ext)); + } + } + + /** + * Remove the extension based on its type. + * + * @param ext Extension to remove + */ + template + void + removeExtension(void) + { + static_assert(std::is_base_of::value, + "Extension should inherit from ExtensionBase."); + + auto it = findExtension(); + if (it != extensions.end()) + extensions.erase(it); + } + + /** + * Get the extension pointer by linear search with the extensionID. + */ + template + std::shared_ptr + getExtension() + { + static_assert(std::is_base_of::value, + "Extension should inherit from ExtensionBase."); + auto it = findExtension(); + if (it == extensions.end()) + return nullptr; + return std::static_pointer_cast(*it); + } + + protected: + + /** + * Go through the extension list and return the iterator to the instance of + * the type of extension. If there is no such an extension, return the end + * iterator of the list. + * + * @return The iterator to the extension type T if there exists. + */ + template + std::list>::iterator + findExtension() + { + auto it = extensions.begin(); + while (it != extensions.end()) { + if ((*it)->getExtensionID() == T::extensionID) + break; + it++; + } + return it; + } + + // Linked list of extensions. + std::list> extensions; +}; + +} // namespace gem5 + +#endif //__BASE_EXTENSIBLE_HH__ diff --git a/src/base/extensible.test.cc b/src/base/extensible.test.cc new file mode 100644 index 0000000000..66cbbda527 --- /dev/null +++ b/src/base/extensible.test.cc @@ -0,0 +1,111 @@ +/* + * Copyright 2023 Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +#include "base/extensible.hh" + +using namespace gem5; + +namespace { + +class TestTarget : public Extensible +{ +}; + +class IntegerExtension : public Extension +{ + public: + explicit IntegerExtension(uint32_t data) + : data_(data) {} + + std::unique_ptr clone() const override + { + return std::unique_ptr(new IntegerExtension(data_)); + } + + uint32_t getData() const { return data_; } + + private: + uint32_t data_; +}; + +class BoolExtension : public Extension +{ + public: + explicit BoolExtension(bool data) + : data_(data) {} + + std::unique_ptr clone() const override + { + return std::unique_ptr(new BoolExtension(data_)); + } + + bool getData() const { return data_; } + + private: + bool data_; +}; + +} // namespace + +TEST(ExtensibleTest, ExtensionID) +{ + std::shared_ptr ext1(new IntegerExtension(0xabcd)); + EXPECT_EQ(0, ext1->getExtensionID()); + + std::shared_ptr ext2(new BoolExtension(true)); + EXPECT_EQ(1, ext2->getExtensionID()); +} + +TEST(ExtensibleTest, SetAndRemoveExtension) +{ + const uint32_t data = 0xbeef; + std::shared_ptr ext(new IntegerExtension(data)); + std::unique_ptr target(new TestTarget); + target->setExtension(ext); + EXPECT_EQ(data, target->getExtension()->getData()); + + target->removeExtension(); + EXPECT_EQ(nullptr, target->getExtension()); +} + +TEST(ExtensibleTest, ReplaceExtension) +{ + const uint32_t data = 0xbeef; + std::shared_ptr ext(new IntegerExtension(data)); + std::unique_ptr target(new TestTarget); + target->setExtension(ext); + EXPECT_EQ(data, target->getExtension()->getData()); + + const uint32_t new_data = 0xa5a5; + std::shared_ptr new_ext(new IntegerExtension(new_data)); + target->setExtension(new_ext); + EXPECT_EQ(new_data, target->getExtension()->getData()); +} From df0bed6858a4b78c1148337695a07e2aeb4125af Mon Sep 17 00:00:00 2001 From: Nikos Nikoleris Date: Thu, 9 Feb 2023 09:33:17 +0000 Subject: [PATCH 197/492] python: Ensure that m5.internal.params is available Add an import to m5.internal.params which became necessary after: 95f9017c2e configs,python: Clean some cruft out of m5.objects. This import is necessary but also causes problems when scons calls build_tools/sim_object_param_struct_hh.py to generate params/SimObject.hh. m5.internal.params itself imports _m5 and _m5 is unavalailable resulting in an ImportError. This is bening and we can safely ignore it. Change-Id: I3809e81284e730fb9c9e0e7e91bd61b801d73f90 Signed-off-by: Nikos Nikoleris Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67797 Maintainer: Giacomo Travaglini Tested-by: kokoro Reviewed-by: Giacomo Travaglini --- src/python/m5/SimObject.py | 3 +++ src/python/m5/internal/params.py | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index b5dfca9752..6caa532897 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -445,6 +445,9 @@ class MetaSimObject(type): return cls.__name__ def getCCClass(cls): + # Ensure that m5.internal.params is available. + import m5.internal.params + return getattr(m5.internal.params, cls.pybind_class) # See ParamValue.cxx_predecls for description. diff --git a/src/python/m5/internal/params.py b/src/python/m5/internal/params.py index 8762a69e61..8225d0b059 100644 --- a/src/python/m5/internal/params.py +++ b/src/python/m5/internal/params.py @@ -37,8 +37,17 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import inspect -import _m5 -for name, module in inspect.getmembers(_m5): - if name.startswith("param_") or name.startswith("enum_"): - exec("from _m5.%s import *" % name) +try: + # Avoid ImportErrors at build time when _m5 is not available + import _m5 + + in_gem5 = True +except ImportError: + # The import failed, we're being called from the build system + in_gem5 = False + +if in_gem5: + for name, module in inspect.getmembers(_m5): + if name.startswith("param_") or name.startswith("enum_"): + exec("from _m5.%s import *" % name) From c913c098a6a6d5afb9dc3911f2fc953510fe328a Mon Sep 17 00:00:00 2001 From: Yan Lee Date: Tue, 14 Feb 2023 19:50:10 -0800 Subject: [PATCH 198/492] mem: add extension mechanism into Packet Change-Id: Ieda941f73078d98ad7896a376d95dd1573c938e6 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67957 Reviewed-by: Yu-hsin Wang Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/mem/packet.hh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 9d720fb9a0..ed7a94f4fb 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -55,6 +55,7 @@ #include "base/addr_range.hh" #include "base/cast.hh" #include "base/compiler.hh" +#include "base/extensible.hh" #include "base/flags.hh" #include "base/logging.hh" #include "base/printable.hh" @@ -290,7 +291,7 @@ class MemCmd * ultimate destination and back, possibly being conveyed by several * different Packets along the way.) */ -class Packet : public Printable +class Packet : public Printable, public Extensible { public: typedef uint32_t FlagsType; @@ -941,7 +942,8 @@ class Packet : public Printable * packet should allocate its own data. */ Packet(const PacketPtr pkt, bool clear_flags, bool alloc_data) - : cmd(pkt->cmd), id(pkt->id), req(pkt->req), + : Extensible(*pkt), + cmd(pkt->cmd), id(pkt->id), req(pkt->req), data(nullptr), addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size), bytesValid(pkt->bytesValid), From 4c9253761ff6ea4bbd4b207d6dca79eedcab903e Mon Sep 17 00:00:00 2001 From: Yan Lee Date: Tue, 14 Feb 2023 19:50:49 -0800 Subject: [PATCH 199/492] mem: add extension mechanism into Request Change-Id: Ie144e0cf243bab6d9ddbea79caf559c7e774a787 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67958 Maintainer: Jason Lowe-Power Reviewed-by: Yu-hsin Wang Tested-by: kokoro Reviewed-by: Jason Lowe-Power --- src/mem/request.hh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mem/request.hh b/src/mem/request.hh index be91c71cc0..491aad0241 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -58,6 +58,7 @@ #include "base/amo.hh" #include "base/compiler.hh" +#include "base/extensible.hh" #include "base/flags.hh" #include "base/types.hh" #include "cpu/inst_seq.hh" @@ -93,7 +94,7 @@ class ThreadContext; typedef std::shared_ptr RequestPtr; typedef uint16_t RequestorID; -class Request +class Request : public Extensible { public: typedef uint64_t FlagsType; @@ -501,7 +502,8 @@ class Request } Request(const Request& other) - : _paddr(other._paddr), _size(other._size), + : Extensible(other), + _paddr(other._paddr), _size(other._size), _byteEnable(other._byteEnable), _requestorId(other._requestorId), _flags(other._flags), From 4dfc312d6df91129a4007e588678d076073d390d Mon Sep 17 00:00:00 2001 From: Yan Lee Date: Thu, 16 Feb 2023 20:56:19 -0800 Subject: [PATCH 200/492] base: extensible: add example codes of extension Change-Id: Iaab1f2998a3f621b86d63bed7274373ba433d71c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68017 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/base/extensible.hh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/base/extensible.hh b/src/base/extensible.hh index eb79c71be3..e80103c577 100644 --- a/src/base/extensible.hh +++ b/src/base/extensible.hh @@ -27,6 +27,10 @@ /* @file * Extensible Object Base Class Declaration + * + * This class can be used to add an "extension" field to packet/request which + * will be passed along with the original packet/request pointer. This allows + * developers to extend packet/request without modifying the original class. */ #ifndef __BASE_EXTENSIBLE_HH__ @@ -69,6 +73,33 @@ class ExtensionBase * This is the extension for carrying additional information. * Each type of extension will have a unique extensionID. * This extensionID will assign to base class for comparsion. + * + * Example usage: + * + * class MyTarget : Extensible {}; + * + * class MyExtension : public Extension + * { + * public: + * MyExtension(); + * std::unique_ptr clone() const override; + * uint32_t getData(); + * + * private: + * uint32_t data_;; + * }; + * + * std::unique_ptr mytarget(new MyTarget); + * std::shared_ptr myext(new MyExtension); + * mytarget->setExtension(myext); + * + * std::shared_ptr ext = mytarget->getExtension(); + * uint32_t data = ext->getData(); + * mytarget->removeExtension(); + * + * In the example above, MyTarget can carry an extension named MyExtension, + * which contains an additional data field. This could be applicated to any + * debug information or any data field in any protocol. */ template class Extension : public ExtensionBase From c995d969568be1890717093a0722bf3cd77b8207 Mon Sep 17 00:00:00 2001 From: Ivan Turasov Date: Tue, 21 Feb 2023 14:46:02 +0100 Subject: [PATCH 201/492] arch-arm: Add missing header in regs/misc.hh Adding the header avoids "error: implicit instantiation of undefined template 'std::array..." error that halted the build process on macOS. Relevant discussion on Slack with Gabriel Bunsot Change-Id: I935d7045f4b2c01ecef7c663de7c1e9408eead57 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68217 Maintainer: Giacomo Travaglini Tested-by: kokoro Reviewed-by: Giacomo Travaglini --- src/arch/arm/regs/misc.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh index 999993b0de..bf25ea3144 100644 --- a/src/arch/arm/regs/misc.hh +++ b/src/arch/arm/regs/misc.hh @@ -41,6 +41,7 @@ #ifndef __ARCH_ARM_REGS_MISC_HH__ #define __ARCH_ARM_REGS_MISC_HH__ +#include #include #include From 3892ee029aa8814284cbdca352862ccc5ead0e49 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sun, 19 Feb 2023 23:49:28 +0000 Subject: [PATCH 202/492] configs: Deprecate fs.py and se.py scripts Ideally, 'configs/common' should also be deprecated, but some tests still depend on this directory. Change-Id: I7c0cbf1f854e1dec9308b6802d6fb70c9af97fc0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68157 Tested-by: kokoro Reviewed-by: Giacomo Travaglini Reviewed-by: Daniel Carvalho Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power --- configs/deprecated/example/fs.py | 444 +++++++++++++++++++++++++++++++ configs/deprecated/example/se.py | 293 ++++++++++++++++++++ configs/example/fs.py | 420 +---------------------------- configs/example/se.py | 267 +------------------ 4 files changed, 750 insertions(+), 674 deletions(-) create mode 100644 configs/deprecated/example/fs.py create mode 100644 configs/deprecated/example/se.py diff --git a/configs/deprecated/example/fs.py b/configs/deprecated/example/fs.py new file mode 100644 index 0000000000..59c35925fc --- /dev/null +++ b/configs/deprecated/example/fs.py @@ -0,0 +1,444 @@ +# Copyright (c) 2010-2013, 2016, 2019-2020 ARM Limited +# Copyright (c) 2020 Barkhausen Institut +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2012-2014 Mark D. Hill and David A. Wood +# Copyright (c) 2009-2011 Advanced Micro Devices, Inc. +# Copyright (c) 2006-2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import sys + +import m5 +from m5.defines import buildEnv +from m5.objects import * +from m5.util import addToPath, fatal, warn +from m5.util.fdthelper import * +from gem5.isas import ISA +from gem5.runtime import get_runtime_isa + +addToPath("../../") + +from ruby import Ruby + +from common.FSConfig import * +from common.SysPaths import * +from common.Benchmarks import * +from common import Simulation +from common import CacheConfig +from common import CpuConfig +from common import MemConfig +from common import ObjectList +from common.Caches import * +from common import Options + + +def cmd_line_template(): + if args.command_line and args.command_line_file: + print( + "Error: --command-line and --command-line-file are " + "mutually exclusive" + ) + sys.exit(1) + if args.command_line: + return args.command_line + if args.command_line_file: + return open(args.command_line_file).read().strip() + return None + + +def build_test_system(np): + cmdline = cmd_line_template() + isa = get_runtime_isa() + if isa == ISA.MIPS: + test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline) + elif isa == ISA.SPARC: + test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline) + elif isa == ISA.RISCV: + test_sys = makeBareMetalRiscvSystem( + test_mem_mode, bm[0], cmdline=cmdline + ) + elif isa == ISA.X86: + test_sys = makeLinuxX86System( + test_mem_mode, np, bm[0], args.ruby, cmdline=cmdline + ) + elif isa == ISA.ARM: + test_sys = makeArmSystem( + test_mem_mode, + args.machine_type, + np, + bm[0], + args.dtb_filename, + bare_metal=args.bare_metal, + cmdline=cmdline, + external_memory=args.external_memory_system, + ruby=args.ruby, + vio_9p=args.vio_9p, + bootloader=args.bootloader, + ) + if args.enable_context_switch_stats_dump: + test_sys.enable_context_switch_stats_dump = True + else: + fatal("Incapable of building %s full system!", isa.name) + + # Set the cache line size for the entire system + test_sys.cache_line_size = args.cacheline_size + + # Create a top-level voltage domain + test_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage) + + # Create a source clock for the system and set the clock period + test_sys.clk_domain = SrcClockDomain( + clock=args.sys_clock, voltage_domain=test_sys.voltage_domain + ) + + # Create a CPU voltage domain + test_sys.cpu_voltage_domain = VoltageDomain() + + # Create a source clock for the CPUs and set the clock period + test_sys.cpu_clk_domain = SrcClockDomain( + clock=args.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain + ) + + if buildEnv["USE_RISCV_ISA"]: + test_sys.workload.bootloader = args.kernel + elif args.kernel is not None: + test_sys.workload.object_file = binary(args.kernel) + + if args.script is not None: + test_sys.readfile = args.script + + test_sys.init_param = args.init_param + + # For now, assign all the CPUs to the same clock domain + test_sys.cpu = [ + TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i) + for i in range(np) + ] + + if args.ruby: + bootmem = getattr(test_sys, "_bootmem", None) + Ruby.create_system( + args, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem + ) + + # Create a seperate clock domain for Ruby + test_sys.ruby.clk_domain = SrcClockDomain( + clock=args.ruby_clock, voltage_domain=test_sys.voltage_domain + ) + + # Connect the ruby io port to the PIO bus, + # assuming that there is just one such port. + test_sys.iobus.mem_side_ports = test_sys.ruby._io_port.in_ports + + for (i, cpu) in enumerate(test_sys.cpu): + # + # Tie the cpu ports to the correct ruby system ports + # + cpu.clk_domain = test_sys.cpu_clk_domain + cpu.createThreads() + cpu.createInterruptController() + + test_sys.ruby._cpu_ports[i].connectCpuPorts(cpu) + + else: + if args.caches or args.l2cache: + # By default the IOCache runs at the system clock + test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges) + test_sys.iocache.cpu_side = test_sys.iobus.mem_side_ports + test_sys.iocache.mem_side = test_sys.membus.cpu_side_ports + elif not args.external_memory_system: + test_sys.iobridge = Bridge( + delay="50ns", ranges=test_sys.mem_ranges + ) + test_sys.iobridge.cpu_side_port = test_sys.iobus.mem_side_ports + test_sys.iobridge.mem_side_port = test_sys.membus.cpu_side_ports + + # Sanity check + if args.simpoint_profile: + if not ObjectList.is_noncaching_cpu(TestCPUClass): + fatal("SimPoint generation should be done with atomic cpu") + if np > 1: + fatal( + "SimPoint generation not supported with more than one CPUs" + ) + + for i in range(np): + if args.simpoint_profile: + test_sys.cpu[i].addSimPointProbe(args.simpoint_interval) + if args.checker: + test_sys.cpu[i].addCheckerCpu() + if not ObjectList.is_kvm_cpu(TestCPUClass): + if args.bp_type: + bpClass = ObjectList.bp_list.get(args.bp_type) + test_sys.cpu[i].branchPred = bpClass() + if args.indirect_bp_type: + IndirectBPClass = ObjectList.indirect_bp_list.get( + args.indirect_bp_type + ) + test_sys.cpu[ + i + ].branchPred.indirectBranchPred = IndirectBPClass() + test_sys.cpu[i].createThreads() + + # If elastic tracing is enabled when not restoring from checkpoint and + # when not fast forwarding using the atomic cpu, then check that the + # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check + # passes then attach the elastic trace probe. + # If restoring from checkpoint or fast forwarding, the code that does this for + # FutureCPUClass is in the Simulation module. If the check passes then the + # elastic trace probe is attached to the switch CPUs. + if ( + args.elastic_trace_en + and args.checkpoint_restore == None + and not args.fast_forward + ): + CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, args) + + CacheConfig.config_cache(args, test_sys) + + MemConfig.config_mem(args, test_sys) + + if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu( + FutureClass + ): + # Assign KVM CPUs to their own event queues / threads. This + # has to be done after creating caches and other child objects + # since these mustn't inherit the CPU event queue. + for i, cpu in enumerate(test_sys.cpu): + # Child objects usually inherit the parent's event + # queue. Override that and use the same event queue for + # all devices. + for obj in cpu.descendants(): + obj.eventq_index = 0 + cpu.eventq_index = i + 1 + test_sys.kvm_vm = KvmVM() + + return test_sys + + +def build_drive_system(np): + # driver system CPU is always simple, so is the memory + # Note this is an assignment of a class, not an instance. + DriveCPUClass = AtomicSimpleCPU + drive_mem_mode = "atomic" + DriveMemClass = SimpleMemory + + cmdline = cmd_line_template() + if buildEnv["USE_MIPS_ISA"]: + drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1], cmdline=cmdline) + elif buildEnv["USE_SPARC_ISA"]: + drive_sys = makeSparcSystem(drive_mem_mode, bm[1], cmdline=cmdline) + elif buildEnv["USE_X86_ISA"]: + drive_sys = makeLinuxX86System( + drive_mem_mode, np, bm[1], cmdline=cmdline + ) + elif buildEnv["USE_ARM_ISA"]: + drive_sys = makeArmSystem( + drive_mem_mode, + args.machine_type, + np, + bm[1], + args.dtb_filename, + cmdline=cmdline, + ) + + # Create a top-level voltage domain + drive_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage) + + # Create a source clock for the system and set the clock period + drive_sys.clk_domain = SrcClockDomain( + clock=args.sys_clock, voltage_domain=drive_sys.voltage_domain + ) + + # Create a CPU voltage domain + drive_sys.cpu_voltage_domain = VoltageDomain() + + # Create a source clock for the CPUs and set the clock period + drive_sys.cpu_clk_domain = SrcClockDomain( + clock=args.cpu_clock, voltage_domain=drive_sys.cpu_voltage_domain + ) + + drive_sys.cpu = DriveCPUClass( + clk_domain=drive_sys.cpu_clk_domain, cpu_id=0 + ) + drive_sys.cpu.createThreads() + drive_sys.cpu.createInterruptController() + drive_sys.cpu.connectBus(drive_sys.membus) + if args.kernel is not None: + drive_sys.workload.object_file = binary(args.kernel) + + if ObjectList.is_kvm_cpu(DriveCPUClass): + drive_sys.kvm_vm = KvmVM() + + drive_sys.iobridge = Bridge(delay="50ns", ranges=drive_sys.mem_ranges) + drive_sys.iobridge.cpu_side_port = drive_sys.iobus.mem_side_ports + drive_sys.iobridge.mem_side_port = drive_sys.membus.cpu_side_ports + + # Create the appropriate memory controllers and connect them to the + # memory bus + drive_sys.mem_ctrls = [ + DriveMemClass(range=r) for r in drive_sys.mem_ranges + ] + for i in range(len(drive_sys.mem_ctrls)): + drive_sys.mem_ctrls[i].port = drive_sys.membus.mem_side_ports + + drive_sys.init_param = args.init_param + + return drive_sys + + +warn( + "The fs.py script is deprecated. It will be removed in future releases of " + " gem5." +) + +# Add args +parser = argparse.ArgumentParser() +Options.addCommonOptions(parser) +Options.addFSOptions(parser) + +# Add the ruby specific and protocol specific args +if "--ruby" in sys.argv: + Ruby.define_options(parser) + +args = parser.parse_args() + +# system under test can be any CPU +(TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) + +# Match the memories with the CPUs, based on the options for the test system +TestMemClass = Simulation.setMemClass(args) + +if args.benchmark: + try: + bm = Benchmarks[args.benchmark] + except KeyError: + print("Error benchmark %s has not been defined." % args.benchmark) + print("Valid benchmarks are: %s" % DefinedBenchmarks) + sys.exit(1) +else: + if args.dual: + bm = [ + SysConfig( + disks=args.disk_image, + rootdev=args.root_device, + mem=args.mem_size, + os_type=args.os_type, + ), + SysConfig( + disks=args.disk_image, + rootdev=args.root_device, + mem=args.mem_size, + os_type=args.os_type, + ), + ] + else: + bm = [ + SysConfig( + disks=args.disk_image, + rootdev=args.root_device, + mem=args.mem_size, + os_type=args.os_type, + ) + ] + +np = args.num_cpus + +test_sys = build_test_system(np) + +if len(bm) == 2: + drive_sys = build_drive_system(np) + root = makeDualRoot(True, test_sys, drive_sys, args.etherdump) +elif len(bm) == 1 and args.dist: + # This system is part of a dist-gem5 simulation + root = makeDistRoot( + test_sys, + args.dist_rank, + args.dist_size, + args.dist_server_name, + args.dist_server_port, + args.dist_sync_repeat, + args.dist_sync_start, + args.ethernet_linkspeed, + args.ethernet_linkdelay, + args.etherdump, + ) +elif len(bm) == 1: + root = Root(full_system=True, system=test_sys) +else: + print("Error I don't know how to create more than 2 systems.") + sys.exit(1) + +if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(FutureClass): + # Required for running kvm on multiple host cores. + # Uses gem5's parallel event queue feature + # Note: The simulator is quite picky about this number! + root.sim_quantum = int(1e9) # 1 ms + +if args.timesync: + root.time_sync_enable = True + +if args.frame_capture: + VncServer.frame_capture = True + +if buildEnv["USE_ARM_ISA"] and not args.bare_metal and not args.dtb_filename: + if args.machine_type not in [ + "VExpress_GEM5", + "VExpress_GEM5_V1", + "VExpress_GEM5_V2", + "VExpress_GEM5_Foundation", + ]: + warn( + "Can only correctly generate a dtb for VExpress_GEM5_* " + "platforms, unless custom hardware models have been equipped " + "with generation functionality." + ) + + # Generate a Device Tree + for sysname in ("system", "testsys", "drivesys"): + if hasattr(root, sysname): + sys = getattr(root, sysname) + sys.workload.dtb_filename = os.path.join( + m5.options.outdir, "%s.dtb" % sysname + ) + sys.generateDtb(sys.workload.dtb_filename) + +if args.wait_gdb: + test_sys.workload.wait_for_remote_gdb = True + +Simulation.setWorkCountOptions(test_sys, args) +Simulation.run(args, root, test_sys, FutureClass) diff --git a/configs/deprecated/example/se.py b/configs/deprecated/example/se.py new file mode 100644 index 0000000000..4732839874 --- /dev/null +++ b/configs/deprecated/example/se.py @@ -0,0 +1,293 @@ +# Copyright (c) 2012-2013 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006-2008 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Simple test script +# +# "m5 test.py" + +import argparse +import sys +import os + +import m5 +from m5.defines import buildEnv +from m5.objects import * +from m5.params import NULL +from m5.util import addToPath, fatal, warn +from gem5.isas import ISA +from gem5.runtime import get_runtime_isa + +addToPath("../../") + +from ruby import Ruby + +from common import Options +from common import Simulation +from common import CacheConfig +from common import CpuConfig +from common import ObjectList +from common import MemConfig +from common.FileSystemConfig import config_filesystem +from common.Caches import * +from common.cpu2000 import * + + +def get_processes(args): + """Interprets provided args and returns a list of processes""" + + multiprocesses = [] + inputs = [] + outputs = [] + errouts = [] + pargs = [] + + workloads = args.cmd.split(";") + if args.input != "": + inputs = args.input.split(";") + if args.output != "": + outputs = args.output.split(";") + if args.errout != "": + errouts = args.errout.split(";") + if args.options != "": + pargs = args.options.split(";") + + idx = 0 + for wrkld in workloads: + process = Process(pid=100 + idx) + process.executable = wrkld + process.cwd = os.getcwd() + process.gid = os.getgid() + + if args.env: + with open(args.env, "r") as f: + process.env = [line.rstrip() for line in f] + + if len(pargs) > idx: + process.cmd = [wrkld] + pargs[idx].split() + else: + process.cmd = [wrkld] + + if len(inputs) > idx: + process.input = inputs[idx] + if len(outputs) > idx: + process.output = outputs[idx] + if len(errouts) > idx: + process.errout = errouts[idx] + + multiprocesses.append(process) + idx += 1 + + if args.smt: + assert args.cpu_type == "DerivO3CPU" + return multiprocesses, idx + else: + return multiprocesses, 1 + + +warn( + "The se.py script is deprecated. It will be removed in future releases of " + " gem5." +) + +parser = argparse.ArgumentParser() +Options.addCommonOptions(parser) +Options.addSEOptions(parser) + +if "--ruby" in sys.argv: + Ruby.define_options(parser) + +args = parser.parse_args() + +multiprocesses = [] +numThreads = 1 + +if args.bench: + apps = args.bench.split("-") + if len(apps) != args.num_cpus: + print("number of benchmarks not equal to set num_cpus!") + sys.exit(1) + + for app in apps: + try: + if get_runtime_isa() == ISA.ARM: + exec( + "workload = %s('arm_%s', 'linux', '%s')" + % (app, args.arm_iset, args.spec_input) + ) + else: + # TARGET_ISA has been removed, but this is missing a ], so it + # has incorrect syntax and wasn't being used anyway. + exec( + "workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" + % (app, args.spec_input) + ) + multiprocesses.append(workload.makeProcess()) + except: + print( + "Unable to find workload for %s: %s" + % (get_runtime_isa().name(), app), + file=sys.stderr, + ) + sys.exit(1) +elif args.cmd: + multiprocesses, numThreads = get_processes(args) +else: + print("No workload specified. Exiting!\n", file=sys.stderr) + sys.exit(1) + + +(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) +CPUClass.numThreads = numThreads + +# Check -- do not allow SMT with multiple CPUs +if args.smt and args.num_cpus > 1: + fatal("You cannot use SMT with multiple CPUs!") + +np = args.num_cpus +mp0_path = multiprocesses[0].executable +system = System( + cpu=[CPUClass(cpu_id=i) for i in range(np)], + mem_mode=test_mem_mode, + mem_ranges=[AddrRange(args.mem_size)], + cache_line_size=args.cacheline_size, +) + +if numThreads > 1: + system.multi_thread = True + +# Create a top-level voltage domain +system.voltage_domain = VoltageDomain(voltage=args.sys_voltage) + +# Create a source clock for the system and set the clock period +system.clk_domain = SrcClockDomain( + clock=args.sys_clock, voltage_domain=system.voltage_domain +) + +# Create a CPU voltage domain +system.cpu_voltage_domain = VoltageDomain() + +# Create a separate clock domain for the CPUs +system.cpu_clk_domain = SrcClockDomain( + clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain +) + +# If elastic tracing is enabled, then configure the cpu and attach the elastic +# trace probe +if args.elastic_trace_en: + CpuConfig.config_etrace(CPUClass, system.cpu, args) + +# All cpus belong to a common cpu_clk_domain, therefore running at a common +# frequency. +for cpu in system.cpu: + cpu.clk_domain = system.cpu_clk_domain + +if ObjectList.is_kvm_cpu(CPUClass) or ObjectList.is_kvm_cpu(FutureClass): + if buildEnv["USE_X86_ISA"]: + system.kvm_vm = KvmVM() + system.m5ops_base = 0xFFFF0000 + for process in multiprocesses: + process.useArchPT = True + process.kvmInSE = True + else: + fatal("KvmCPU can only be used in SE mode with x86") + +# Sanity check +if args.simpoint_profile: + if not ObjectList.is_noncaching_cpu(CPUClass): + fatal("SimPoint/BPProbe should be done with an atomic cpu") + if np > 1: + fatal("SimPoint generation not supported with more than one CPUs") + +for i in range(np): + if args.smt: + system.cpu[i].workload = multiprocesses + elif len(multiprocesses) == 1: + system.cpu[i].workload = multiprocesses[0] + else: + system.cpu[i].workload = multiprocesses[i] + + if args.simpoint_profile: + system.cpu[i].addSimPointProbe(args.simpoint_interval) + + if args.checker: + system.cpu[i].addCheckerCpu() + + if args.bp_type: + bpClass = ObjectList.bp_list.get(args.bp_type) + system.cpu[i].branchPred = bpClass() + + if args.indirect_bp_type: + indirectBPClass = ObjectList.indirect_bp_list.get( + args.indirect_bp_type + ) + system.cpu[i].branchPred.indirectBranchPred = indirectBPClass() + + system.cpu[i].createThreads() + +if args.ruby: + Ruby.create_system(args, False, system) + assert args.num_cpus == len(system.ruby._cpu_ports) + + system.ruby.clk_domain = SrcClockDomain( + clock=args.ruby_clock, voltage_domain=system.voltage_domain + ) + for i in range(np): + ruby_port = system.ruby._cpu_ports[i] + + # Create the interrupt controller and connect its ports to Ruby + # Note that the interrupt controller is always present but only + # in x86 does it have message ports that need to be connected + system.cpu[i].createInterruptController() + + # Connect the cpu's cache ports to Ruby + ruby_port.connectCpuPorts(system.cpu[i]) +else: + MemClass = Simulation.setMemClass(args) + system.membus = SystemXBar() + system.system_port = system.membus.cpu_side_ports + CacheConfig.config_cache(args, system) + MemConfig.config_mem(args, system) + config_filesystem(system, args) + +system.workload = SEWorkload.init_compatible(mp0_path) + +if args.wait_gdb: + system.workload.wait_for_remote_gdb = True + +root = Root(full_system=False, system=system) +Simulation.run(args, root, system, FutureClass) diff --git a/configs/example/fs.py b/configs/example/fs.py index 0e31cfccac..30b4f19553 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -1,19 +1,4 @@ -# Copyright (c) 2010-2013, 2016, 2019-2020 ARM Limited -# Copyright (c) 2020 Barkhausen Institut -# All rights reserved. -# -# The license below extends only to copyright in the software and shall -# not be construed as granting a license to any other intellectual -# property including but not limited to intellectual property relating -# to a hardware implementation of the functionality of the software -# licensed hereunder. You may use the software subject to the license -# terms below provided that you ensure that this notice is replicated -# unmodified and in its entirety in all distributions of the software, -# modified or unmodified, in source code or in binary form. -# -# Copyright (c) 2012-2014 Mark D. Hill and David A. Wood -# Copyright (c) 2009-2011 Advanced Micro Devices, Inc. -# Copyright (c) 2006-2007 The Regents of The University of Michigan +# Copyright (c) 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -39,401 +24,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import argparse -import sys +from m5.util import fatal -import m5 -from m5.defines import buildEnv -from m5.objects import * -from m5.util import addToPath, fatal, warn -from m5.util.fdthelper import * -from gem5.isas import ISA -from gem5.runtime import get_runtime_isa - -addToPath("../") - -from ruby import Ruby - -from common.FSConfig import * -from common.SysPaths import * -from common.Benchmarks import * -from common import Simulation -from common import CacheConfig -from common import CpuConfig -from common import MemConfig -from common import ObjectList -from common.Caches import * -from common import Options - - -def cmd_line_template(): - if args.command_line and args.command_line_file: - print( - "Error: --command-line and --command-line-file are " - "mutually exclusive" - ) - sys.exit(1) - if args.command_line: - return args.command_line - if args.command_line_file: - return open(args.command_line_file).read().strip() - return None - - -def build_test_system(np): - cmdline = cmd_line_template() - isa = get_runtime_isa() - if isa == ISA.MIPS: - test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline) - elif isa == ISA.SPARC: - test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline) - elif isa == ISA.RISCV: - test_sys = makeBareMetalRiscvSystem( - test_mem_mode, bm[0], cmdline=cmdline - ) - elif isa == ISA.X86: - test_sys = makeLinuxX86System( - test_mem_mode, np, bm[0], args.ruby, cmdline=cmdline - ) - elif isa == ISA.ARM: - test_sys = makeArmSystem( - test_mem_mode, - args.machine_type, - np, - bm[0], - args.dtb_filename, - bare_metal=args.bare_metal, - cmdline=cmdline, - external_memory=args.external_memory_system, - ruby=args.ruby, - vio_9p=args.vio_9p, - bootloader=args.bootloader, - ) - if args.enable_context_switch_stats_dump: - test_sys.enable_context_switch_stats_dump = True - else: - fatal("Incapable of building %s full system!", isa.name) - - # Set the cache line size for the entire system - test_sys.cache_line_size = args.cacheline_size - - # Create a top-level voltage domain - test_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage) - - # Create a source clock for the system and set the clock period - test_sys.clk_domain = SrcClockDomain( - clock=args.sys_clock, voltage_domain=test_sys.voltage_domain - ) - - # Create a CPU voltage domain - test_sys.cpu_voltage_domain = VoltageDomain() - - # Create a source clock for the CPUs and set the clock period - test_sys.cpu_clk_domain = SrcClockDomain( - clock=args.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain - ) - - if buildEnv["USE_RISCV_ISA"]: - test_sys.workload.bootloader = args.kernel - elif args.kernel is not None: - test_sys.workload.object_file = binary(args.kernel) - - if args.script is not None: - test_sys.readfile = args.script - - test_sys.init_param = args.init_param - - # For now, assign all the CPUs to the same clock domain - test_sys.cpu = [ - TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i) - for i in range(np) - ] - - if args.ruby: - bootmem = getattr(test_sys, "_bootmem", None) - Ruby.create_system( - args, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem - ) - - # Create a seperate clock domain for Ruby - test_sys.ruby.clk_domain = SrcClockDomain( - clock=args.ruby_clock, voltage_domain=test_sys.voltage_domain - ) - - # Connect the ruby io port to the PIO bus, - # assuming that there is just one such port. - test_sys.iobus.mem_side_ports = test_sys.ruby._io_port.in_ports - - for (i, cpu) in enumerate(test_sys.cpu): - # - # Tie the cpu ports to the correct ruby system ports - # - cpu.clk_domain = test_sys.cpu_clk_domain - cpu.createThreads() - cpu.createInterruptController() - - test_sys.ruby._cpu_ports[i].connectCpuPorts(cpu) - - else: - if args.caches or args.l2cache: - # By default the IOCache runs at the system clock - test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges) - test_sys.iocache.cpu_side = test_sys.iobus.mem_side_ports - test_sys.iocache.mem_side = test_sys.membus.cpu_side_ports - elif not args.external_memory_system: - test_sys.iobridge = Bridge( - delay="50ns", ranges=test_sys.mem_ranges - ) - test_sys.iobridge.cpu_side_port = test_sys.iobus.mem_side_ports - test_sys.iobridge.mem_side_port = test_sys.membus.cpu_side_ports - - # Sanity check - if args.simpoint_profile: - if not ObjectList.is_noncaching_cpu(TestCPUClass): - fatal("SimPoint generation should be done with atomic cpu") - if np > 1: - fatal( - "SimPoint generation not supported with more than one CPUs" - ) - - for i in range(np): - if args.simpoint_profile: - test_sys.cpu[i].addSimPointProbe(args.simpoint_interval) - if args.checker: - test_sys.cpu[i].addCheckerCpu() - if not ObjectList.is_kvm_cpu(TestCPUClass): - if args.bp_type: - bpClass = ObjectList.bp_list.get(args.bp_type) - test_sys.cpu[i].branchPred = bpClass() - if args.indirect_bp_type: - IndirectBPClass = ObjectList.indirect_bp_list.get( - args.indirect_bp_type - ) - test_sys.cpu[ - i - ].branchPred.indirectBranchPred = IndirectBPClass() - test_sys.cpu[i].createThreads() - - # If elastic tracing is enabled when not restoring from checkpoint and - # when not fast forwarding using the atomic cpu, then check that the - # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check - # passes then attach the elastic trace probe. - # If restoring from checkpoint or fast forwarding, the code that does this for - # FutureCPUClass is in the Simulation module. If the check passes then the - # elastic trace probe is attached to the switch CPUs. - if ( - args.elastic_trace_en - and args.checkpoint_restore == None - and not args.fast_forward - ): - CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, args) - - CacheConfig.config_cache(args, test_sys) - - MemConfig.config_mem(args, test_sys) - - if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu( - FutureClass - ): - # Assign KVM CPUs to their own event queues / threads. This - # has to be done after creating caches and other child objects - # since these mustn't inherit the CPU event queue. - for i, cpu in enumerate(test_sys.cpu): - # Child objects usually inherit the parent's event - # queue. Override that and use the same event queue for - # all devices. - for obj in cpu.descendants(): - obj.eventq_index = 0 - cpu.eventq_index = i + 1 - test_sys.kvm_vm = KvmVM() - - return test_sys - - -def build_drive_system(np): - # driver system CPU is always simple, so is the memory - # Note this is an assignment of a class, not an instance. - DriveCPUClass = AtomicSimpleCPU - drive_mem_mode = "atomic" - DriveMemClass = SimpleMemory - - cmdline = cmd_line_template() - if buildEnv["USE_MIPS_ISA"]: - drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1], cmdline=cmdline) - elif buildEnv["USE_SPARC_ISA"]: - drive_sys = makeSparcSystem(drive_mem_mode, bm[1], cmdline=cmdline) - elif buildEnv["USE_X86_ISA"]: - drive_sys = makeLinuxX86System( - drive_mem_mode, np, bm[1], cmdline=cmdline - ) - elif buildEnv["USE_ARM_ISA"]: - drive_sys = makeArmSystem( - drive_mem_mode, - args.machine_type, - np, - bm[1], - args.dtb_filename, - cmdline=cmdline, - ) - - # Create a top-level voltage domain - drive_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage) - - # Create a source clock for the system and set the clock period - drive_sys.clk_domain = SrcClockDomain( - clock=args.sys_clock, voltage_domain=drive_sys.voltage_domain - ) - - # Create a CPU voltage domain - drive_sys.cpu_voltage_domain = VoltageDomain() - - # Create a source clock for the CPUs and set the clock period - drive_sys.cpu_clk_domain = SrcClockDomain( - clock=args.cpu_clock, voltage_domain=drive_sys.cpu_voltage_domain - ) - - drive_sys.cpu = DriveCPUClass( - clk_domain=drive_sys.cpu_clk_domain, cpu_id=0 - ) - drive_sys.cpu.createThreads() - drive_sys.cpu.createInterruptController() - drive_sys.cpu.connectBus(drive_sys.membus) - if args.kernel is not None: - drive_sys.workload.object_file = binary(args.kernel) - - if ObjectList.is_kvm_cpu(DriveCPUClass): - drive_sys.kvm_vm = KvmVM() - - drive_sys.iobridge = Bridge(delay="50ns", ranges=drive_sys.mem_ranges) - drive_sys.iobridge.cpu_side_port = drive_sys.iobus.mem_side_ports - drive_sys.iobridge.mem_side_port = drive_sys.membus.cpu_side_ports - - # Create the appropriate memory controllers and connect them to the - # memory bus - drive_sys.mem_ctrls = [ - DriveMemClass(range=r) for r in drive_sys.mem_ranges - ] - for i in range(len(drive_sys.mem_ctrls)): - drive_sys.mem_ctrls[i].port = drive_sys.membus.mem_side_ports - - drive_sys.init_param = args.init_param - - return drive_sys - - -# Add args -parser = argparse.ArgumentParser() -Options.addCommonOptions(parser) -Options.addFSOptions(parser) - -# Add the ruby specific and protocol specific args -if "--ruby" in sys.argv: - Ruby.define_options(parser) - -args = parser.parse_args() - -# system under test can be any CPU -(TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) - -# Match the memories with the CPUs, based on the options for the test system -TestMemClass = Simulation.setMemClass(args) - -if args.benchmark: - try: - bm = Benchmarks[args.benchmark] - except KeyError: - print("Error benchmark %s has not been defined." % args.benchmark) - print("Valid benchmarks are: %s" % DefinedBenchmarks) - sys.exit(1) -else: - if args.dual: - bm = [ - SysConfig( - disks=args.disk_image, - rootdev=args.root_device, - mem=args.mem_size, - os_type=args.os_type, - ), - SysConfig( - disks=args.disk_image, - rootdev=args.root_device, - mem=args.mem_size, - os_type=args.os_type, - ), - ] - else: - bm = [ - SysConfig( - disks=args.disk_image, - rootdev=args.root_device, - mem=args.mem_size, - os_type=args.os_type, - ) - ] - -np = args.num_cpus - -test_sys = build_test_system(np) - -if len(bm) == 2: - drive_sys = build_drive_system(np) - root = makeDualRoot(True, test_sys, drive_sys, args.etherdump) -elif len(bm) == 1 and args.dist: - # This system is part of a dist-gem5 simulation - root = makeDistRoot( - test_sys, - args.dist_rank, - args.dist_size, - args.dist_server_name, - args.dist_server_port, - args.dist_sync_repeat, - args.dist_sync_start, - args.ethernet_linkspeed, - args.ethernet_linkdelay, - args.etherdump, - ) -elif len(bm) == 1: - root = Root(full_system=True, system=test_sys) -else: - print("Error I don't know how to create more than 2 systems.") - sys.exit(1) - -if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(FutureClass): - # Required for running kvm on multiple host cores. - # Uses gem5's parallel event queue feature - # Note: The simulator is quite picky about this number! - root.sim_quantum = int(1e9) # 1 ms - -if args.timesync: - root.time_sync_enable = True - -if args.frame_capture: - VncServer.frame_capture = True - -if buildEnv["USE_ARM_ISA"] and not args.bare_metal and not args.dtb_filename: - if args.machine_type not in [ - "VExpress_GEM5", - "VExpress_GEM5_V1", - "VExpress_GEM5_V2", - "VExpress_GEM5_Foundation", - ]: - warn( - "Can only correctly generate a dtb for VExpress_GEM5_* " - "platforms, unless custom hardware models have been equipped " - "with generation functionality." - ) - - # Generate a Device Tree - for sysname in ("system", "testsys", "drivesys"): - if hasattr(root, sysname): - sys = getattr(root, sysname) - sys.workload.dtb_filename = os.path.join( - m5.options.outdir, "%s.dtb" % sysname - ) - sys.generateDtb(sys.workload.dtb_filename) - -if args.wait_gdb: - test_sys.workload.wait_for_remote_gdb = True - -Simulation.setWorkCountOptions(test_sys, args) -Simulation.run(args, root, test_sys, FutureClass) +fatal( + "The 'configs/example/fs.py' script has been deprecated. It can be " + "found in 'configs/deprecated/example' if required. Its usage should be " + "avoided as it will be removed in future releases of gem5." +) diff --git a/configs/example/se.py b/configs/example/se.py index 2372cf0efe..c185f09e5a 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -1,16 +1,4 @@ -# Copyright (c) 2012-2013 ARM Limited -# All rights reserved. -# -# The license below extends only to copyright in the software and shall -# not be construed as granting a license to any other intellectual -# property including but not limited to intellectual property relating -# to a hardware implementation of the functionality of the software -# licensed hereunder. You may use the software subject to the license -# terms below provided that you ensure that this notice is replicated -# unmodified and in its entirety in all distributions of the software, -# modified or unmodified, in source code or in binary form. -# -# Copyright (c) 2006-2008 The Regents of The University of Michigan +# Copyright (c) 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,253 +24,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# Simple test script -# -# "m5 test.py" +from m5.util import fatal -import argparse -import sys -import os - -import m5 -from m5.defines import buildEnv -from m5.objects import * -from m5.params import NULL -from m5.util import addToPath, fatal, warn -from gem5.isas import ISA -from gem5.runtime import get_runtime_isa - -addToPath("../") - -from ruby import Ruby - -from common import Options -from common import Simulation -from common import CacheConfig -from common import CpuConfig -from common import ObjectList -from common import MemConfig -from common.FileSystemConfig import config_filesystem -from common.Caches import * -from common.cpu2000 import * - - -def get_processes(args): - """Interprets provided args and returns a list of processes""" - - multiprocesses = [] - inputs = [] - outputs = [] - errouts = [] - pargs = [] - - workloads = args.cmd.split(";") - if args.input != "": - inputs = args.input.split(";") - if args.output != "": - outputs = args.output.split(";") - if args.errout != "": - errouts = args.errout.split(";") - if args.options != "": - pargs = args.options.split(";") - - idx = 0 - for wrkld in workloads: - process = Process(pid=100 + idx) - process.executable = wrkld - process.cwd = os.getcwd() - process.gid = os.getgid() - - if args.env: - with open(args.env, "r") as f: - process.env = [line.rstrip() for line in f] - - if len(pargs) > idx: - process.cmd = [wrkld] + pargs[idx].split() - else: - process.cmd = [wrkld] - - if len(inputs) > idx: - process.input = inputs[idx] - if len(outputs) > idx: - process.output = outputs[idx] - if len(errouts) > idx: - process.errout = errouts[idx] - - multiprocesses.append(process) - idx += 1 - - if args.smt: - assert args.cpu_type == "DerivO3CPU" - return multiprocesses, idx - else: - return multiprocesses, 1 - - -parser = argparse.ArgumentParser() -Options.addCommonOptions(parser) -Options.addSEOptions(parser) - -if "--ruby" in sys.argv: - Ruby.define_options(parser) - -args = parser.parse_args() - -multiprocesses = [] -numThreads = 1 - -if args.bench: - apps = args.bench.split("-") - if len(apps) != args.num_cpus: - print("number of benchmarks not equal to set num_cpus!") - sys.exit(1) - - for app in apps: - try: - if get_runtime_isa() == ISA.ARM: - exec( - "workload = %s('arm_%s', 'linux', '%s')" - % (app, args.arm_iset, args.spec_input) - ) - else: - # TARGET_ISA has been removed, but this is missing a ], so it - # has incorrect syntax and wasn't being used anyway. - exec( - "workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" - % (app, args.spec_input) - ) - multiprocesses.append(workload.makeProcess()) - except: - print( - "Unable to find workload for %s: %s" - % (get_runtime_isa().name(), app), - file=sys.stderr, - ) - sys.exit(1) -elif args.cmd: - multiprocesses, numThreads = get_processes(args) -else: - print("No workload specified. Exiting!\n", file=sys.stderr) - sys.exit(1) - - -(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) -CPUClass.numThreads = numThreads - -# Check -- do not allow SMT with multiple CPUs -if args.smt and args.num_cpus > 1: - fatal("You cannot use SMT with multiple CPUs!") - -np = args.num_cpus -mp0_path = multiprocesses[0].executable -system = System( - cpu=[CPUClass(cpu_id=i) for i in range(np)], - mem_mode=test_mem_mode, - mem_ranges=[AddrRange(args.mem_size)], - cache_line_size=args.cacheline_size, +fatal( + "The 'configs/example/se.py' script has been deprecated. It can be " + "found in 'configs/deprecated/example' if required. Its usage should be " + "avoided as it will be removed in future releases of gem5." ) - -if numThreads > 1: - system.multi_thread = True - -# Create a top-level voltage domain -system.voltage_domain = VoltageDomain(voltage=args.sys_voltage) - -# Create a source clock for the system and set the clock period -system.clk_domain = SrcClockDomain( - clock=args.sys_clock, voltage_domain=system.voltage_domain -) - -# Create a CPU voltage domain -system.cpu_voltage_domain = VoltageDomain() - -# Create a separate clock domain for the CPUs -system.cpu_clk_domain = SrcClockDomain( - clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain -) - -# If elastic tracing is enabled, then configure the cpu and attach the elastic -# trace probe -if args.elastic_trace_en: - CpuConfig.config_etrace(CPUClass, system.cpu, args) - -# All cpus belong to a common cpu_clk_domain, therefore running at a common -# frequency. -for cpu in system.cpu: - cpu.clk_domain = system.cpu_clk_domain - -if ObjectList.is_kvm_cpu(CPUClass) or ObjectList.is_kvm_cpu(FutureClass): - if buildEnv["USE_X86_ISA"]: - system.kvm_vm = KvmVM() - system.m5ops_base = 0xFFFF0000 - for process in multiprocesses: - process.useArchPT = True - process.kvmInSE = True - else: - fatal("KvmCPU can only be used in SE mode with x86") - -# Sanity check -if args.simpoint_profile: - if not ObjectList.is_noncaching_cpu(CPUClass): - fatal("SimPoint/BPProbe should be done with an atomic cpu") - if np > 1: - fatal("SimPoint generation not supported with more than one CPUs") - -for i in range(np): - if args.smt: - system.cpu[i].workload = multiprocesses - elif len(multiprocesses) == 1: - system.cpu[i].workload = multiprocesses[0] - else: - system.cpu[i].workload = multiprocesses[i] - - if args.simpoint_profile: - system.cpu[i].addSimPointProbe(args.simpoint_interval) - - if args.checker: - system.cpu[i].addCheckerCpu() - - if args.bp_type: - bpClass = ObjectList.bp_list.get(args.bp_type) - system.cpu[i].branchPred = bpClass() - - if args.indirect_bp_type: - indirectBPClass = ObjectList.indirect_bp_list.get( - args.indirect_bp_type - ) - system.cpu[i].branchPred.indirectBranchPred = indirectBPClass() - - system.cpu[i].createThreads() - -if args.ruby: - Ruby.create_system(args, False, system) - assert args.num_cpus == len(system.ruby._cpu_ports) - - system.ruby.clk_domain = SrcClockDomain( - clock=args.ruby_clock, voltage_domain=system.voltage_domain - ) - for i in range(np): - ruby_port = system.ruby._cpu_ports[i] - - # Create the interrupt controller and connect its ports to Ruby - # Note that the interrupt controller is always present but only - # in x86 does it have message ports that need to be connected - system.cpu[i].createInterruptController() - - # Connect the cpu's cache ports to Ruby - ruby_port.connectCpuPorts(system.cpu[i]) -else: - MemClass = Simulation.setMemClass(args) - system.membus = SystemXBar() - system.system_port = system.membus.cpu_side_ports - CacheConfig.config_cache(args, system) - MemConfig.config_mem(args, system) - config_filesystem(system, args) - -system.workload = SEWorkload.init_compatible(mp0_path) - -if args.wait_gdb: - system.workload.wait_for_remote_gdb = True - -root = Root(full_system=False, system=system) -Simulation.run(args, root, system, FutureClass) From 4ee724e054f9cf19685bd81f6e74e019040013cc Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 6 Jan 2023 16:58:16 +0000 Subject: [PATCH 203/492] stdlib: Specialize the gem5-resources This commit specializes the Resource class into specific sub-types. The `Resource`, `CustomResource` and `CustomDiskImageResource` classes have been deprecated in favor of the `AbstractResource` subclasses. Custom Resources can be created via the resource specialization constructor. Resources can be obtained via the gem5-resource infrastructure with the `obtain_resource` function. Fully implemented: - DiskImageResource - BinaryResource - KernelResource - BootloaderResource - FileResource - DirectoryResource Partially implemented: - SimpointResource - CheckpointResource While the schema of the resource.json file has changed, efforts have been made to ensure backwards compatibility is maintained during this transition. Tests are included in this commit to verify this feature works as expected. **Note:** The Simpoint tests are disabled in this commit, to be reenabled when Simpoint resource specialization is fully incorporated here: https://gem5-review.googlesource.com/c/public/gem5/+/67339 Change-Id: I77277ecaffc7abc86db08526aacc0b606ef04fe8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67175 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- .../components/boards/kernel_disk_workload.py | 36 +- .../components/boards/se_binary_workload.py | 22 +- src/python/gem5/resources/downloader.py | 12 +- src/python/gem5/resources/resource.py | 565 ++++++++++++++---- src/python/gem5/resources/workload.py | 4 +- .../test_gem5_library_examples.py | 74 +-- .../pyunit_resource_specialization.py | 196 ++++++ .../resources/pyunit_workload_checks.py | 26 +- .../refs/resource-specialization.json | 99 +++ 9 files changed, 837 insertions(+), 197 deletions(-) create mode 100644 tests/pyunit/stdlib/resources/pyunit_resource_specialization.py create mode 100644 tests/pyunit/stdlib/resources/refs/resource-specialization.json diff --git a/src/python/gem5/components/boards/kernel_disk_workload.py b/src/python/gem5/components/boards/kernel_disk_workload.py index 29d38baa7b..15e0cdf303 100644 --- a/src/python/gem5/components/boards/kernel_disk_workload.py +++ b/src/python/gem5/components/boards/kernel_disk_workload.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2021, 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,7 +27,12 @@ from abc import abstractmethod from .abstract_board import AbstractBoard -from ...resources.resource import AbstractResource +from ...resources.resource import ( + DiskImageResource, + BootloaderResource, + CheckpointResource, + KernelResource, +) from typing import List, Optional, Union import os @@ -89,7 +94,7 @@ class KernelDiskWorkload: raise NotImplementedError @abstractmethod - def _add_disk_to_board(self, disk_image: AbstractResource) -> None: + def _add_disk_to_board(self, disk_image: DiskImageResource) -> None: """ Sets the configuration needed to add the disk image to the board. @@ -101,7 +106,7 @@ class KernelDiskWorkload: raise NotImplementedError def get_disk_root_partition( - cls, disk_image: AbstractResource + cls, disk_image: DiskImageResource ) -> Optional[str]: """ Obtains the root partition of a disk image by inspecting the resource's @@ -109,14 +114,11 @@ class KernelDiskWorkload: :returns: The disk image's root partition. """ - try: - return disk_image.get_metadata()["additional_metadata"][ - "root_partition" - ] - except KeyError: - return None + return disk_image.get_root_partition() - def get_default_kernel_root_val(self, disk_image: AbstractResource) -> str: + def get_default_kernel_root_val( + self, disk_image: DiskImageResource + ) -> str: """ Get the default kernel root value to be passed to the kernel. This is determined by the value implemented in the `get_disk_device()` @@ -134,14 +136,14 @@ class KernelDiskWorkload: def set_kernel_disk_workload( self, - kernel: AbstractResource, - disk_image: AbstractResource, - bootloader: Optional[AbstractResource] = None, + kernel: KernelResource, + disk_image: DiskImageResource, + bootloader: Optional[BootloaderResource] = None, readfile: Optional[str] = None, readfile_contents: Optional[str] = None, kernel_args: Optional[List[str]] = None, exit_on_work_items: bool = True, - checkpoint: Optional[Union[Path, AbstractResource]] = None, + checkpoint: Optional[Union[Path, CheckpointResource]] = None, ) -> None: """ This function allows the setting of a full-system run with a Kernel @@ -212,11 +214,11 @@ class KernelDiskWorkload: if checkpoint: if isinstance(checkpoint, Path): self._checkpoint = checkpoint - elif isinstance(checkpoint, AbstractResource): + elif isinstance(checkpoint, CheckpointResource): self._checkpoint = Path(checkpoint.get_local_path()) else: # The checkpoint_dir must be None, Path, Or AbstractResource. raise Exception( "Checkpoints must be passed as a Path or an " - "AbstractResource." + "CheckpointResource." ) diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index 8ec112ee13..acedfaf9a9 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -25,7 +25,13 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from .abstract_board import AbstractBoard -from ...resources.resource import AbstractResource +from ...resources.resource import ( + FileResource, + AbstractResource, + BinaryResource, + CheckpointResource, + SimpointResource, +) from gem5.utils.simpoint import SimPoint from m5.objects import SEWorkload, Process @@ -51,13 +57,13 @@ class SEBinaryWorkload: def set_se_binary_workload( self, - binary: AbstractResource, + binary: BinaryResource, exit_on_work_items: bool = True, - stdin_file: Optional[AbstractResource] = None, + stdin_file: Optional[FileResource] = None, stdout_file: Optional[Path] = None, stderr_file: Optional[Path] = None, arguments: List[str] = [], - checkpoint: Optional[Union[Path, AbstractResource]] = None, + checkpoint: Optional[Union[Path, CheckpointResource]] = None, ) -> None: """Set up the system to run a specific binary. @@ -117,10 +123,10 @@ class SEBinaryWorkload: def set_se_simpoint_workload( self, - binary: AbstractResource, + binary: BinaryResource, arguments: List[str] = [], - simpoint: Union[AbstractResource, SimPoint] = None, - checkpoint: Optional[Union[Path, AbstractResource]] = None, + simpoint: Union[SimpointResource, SimPoint] = None, + checkpoint: Optional[Union[Path, CheckpointResource]] = None, ) -> None: """Set up the system to run a SimPoint workload. @@ -141,7 +147,7 @@ class SEBinaryWorkload: """ # convert input to SimPoint if necessary - if isinstance(simpoint, AbstractResource): + if isinstance(simpoint, SimpointResource): self._simpoint_object = SimPoint(simpoint) else: assert isinstance(simpoint, SimPoint) diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py index 1fda8d86b6..0b67ecdebd 100644 --- a/src/python/gem5/resources/downloader.py +++ b/src/python/gem5/resources/downloader.py @@ -323,7 +323,11 @@ def list_resources() -> List[str]: :returns: A list of resources by name. """ - return _get_resources(valid_types={"resource"}).keys() + from .resource import _get_resource_json_type_map + + return _get_resources( + valid_types=_get_resource_json_type_map.keys() + ).keys() def get_workload_json_obj(workload_name: str) -> Dict: @@ -356,7 +360,11 @@ def get_resources_json_obj(resource_name: str) -> Dict: :raises Exception: An exception is raised if the specified resources does not exist. """ - resource_map = _get_resources(valid_types={"resource"}) + from .resource import _get_resource_json_type_map + + resource_map = _get_resources( + valid_types=_get_resource_json_type_map.keys() + ) if resource_name not in resource_map: raise Exception( diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py index 1f7305def7..e4873d689d 100644 --- a/src/python/gem5/resources/resource.py +++ b/src/python/gem5/resources/resource.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2021-2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,43 +27,401 @@ from abc import ABCMeta import os from pathlib import Path +from m5.util import warn from .downloader import get_resource, get_resources_json_obj -from typing import Optional, Dict +from ..isas import ISA, get_isa_from_str + +from typing import Optional, Dict, Union, Type """ -A Resource object encapsulates a gem5 resource. Resources are items needed to -run a simulation, such as a disk image, kernel, or binary. The gem5 project -provides pre-built resources, with sources, at . +Resources are items needed to run a simulation, such as a disk image, kernel, +or binary. The gem5 project provides pre-built resources, with sources, at +. Here we provide the `AbstractResource` class and its +various implementations which are designed to encapsulate a resource for use +in the gem5 Standard Library. -The purpose of this encapsulation is two fold: +These classes may be contructed directly. E.g.: -1. It allows automatic retrieval of gem5 resources. E.g., specifying a resource - which is not local will initiate a download. -2. It provides a location where code may be added to record the resources used - within a simulation. At present this is a TODO work-item. +```python +binary = BinaryResource(local_path="/path/to/binary") +``` + +or obtained via the gem5-resources infrastructure with the `obtain_resource` +function: + +```python +binary = obtain_resource("resource name here") +``` """ class AbstractResource: + """ + An abstract class which all Resource classes inherit from. + """ __metaclass__ = ABCMeta - def __init__(self, local_path: str, metadata: Dict = {}): + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + ): + """ + :param local_path: The path on the host system where this resource is + located + :param documentation: Documentation describing this resource. Not a + required parameter. By default is None. + :param source: The source (as in "source code") for this resource. This + string should navigate users to where the source for this resource + may be found. Not a required parameter. By default is None. + """ + + if not os.path.exists(local_path): + raise Exception( + f"Local path specified for resource, '{local_path}', does not " + "exist." + ) + self._local_path = local_path - self._metadata = metadata + self._documentation = documentation + self._source = source def get_local_path(self) -> str: + """Returns the local path of the resource.""" return self._local_path - def get_metadata(self) -> Dict: + def get_documentation(self) -> Optional[str]: + """Returns documentation associated with this resource.""" + return self._documentation + + def get_source(self) -> Optional[str]: + """Returns information as to where the source for this resource may be + found. """ - Returns the raw data from this resource, as seen in the - `resources.json` file. A user may specify the metadata of a local - resource. - """ - return self._metadata + return self._source + + +class FileResource(AbstractResource): + """A resource consisting of a single file.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + if not os.path.isfile(local_path): + raise Exception( + f"FileResource path specified, '{local_path}', is not a file." + ) + + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + + +class DirectoryResource(AbstractResource): + """A resource consisting of a directory.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + + if not os.path.isdir(local_path): + raise Exception( + f"DirectoryResource path specified, {local_path}, is not a " + "directory." + ) + + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + + +class DiskImageResource(FileResource): + """A Disk Image resource.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + root_partition: Optional[str] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + self._root_partition = root_partition + + def get_root_partition(self) -> Optional[str]: + """Returns, if applicable, the Root Partition of the disk image.""" + return self._root_partition + + +class BinaryResource(FileResource): + """A binary resource.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + architecture: Optional[Union[ISA, str]] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + + self._architecture = None + if architecture: + if isinstance(architecture, str): + self._architecture = get_isa_from_str(architecture) + elif isinstance(architecture, ISA): + self._architecture = architecture + + def get_architecture(self) -> Optional[ISA]: + """Returns the ISA this binary is compiled to.""" + return self._architecture + + +class BootloaderResource(BinaryResource): + """A bootloader resource.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + architecture: Optional[Union[ISA, str]] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + architecture=architecture, + source=source, + ) + + +class GitResource(DirectoryResource): + """A git resource.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + + +class KernelResource(BinaryResource): + """A kernel resource.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + architecture: Optional[Union[ISA, str]] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + architecture=architecture, + ) + + +class CheckpointResource(DirectoryResource): + """A checkpoint resource. The following directory structure is expected: + + : + - board.physmem.store0.pmem + - m5.cpt + """ + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + + +class SimpointResource(DirectoryResource): + """A simpoint resource.""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + super().__init__( + local_path=local_path, + documentation=documentation, + source=source, + ) + + +def obtain_resource( + resource_name: str, + resource_directory: Optional[str] = None, + download_md5_mismatch: bool = True, +) -> AbstractResource: + """ + This function primarily serves as a factory for resources. It will return + the correct `AbstractResource` implementation based on the resource + requested, by referencing the "resource.json" file (by default, that hosted + at https://resources.gem5.org/resources.json). In addition to this, this + function will download the resource if not detected in the + `resource_directory`. + + :param resource_name: The name of the gem5 resource as it appears under the + "name" field in the `resource.json` file. + :param resource_directory: The location of the directory in which the + resource is to be stored. If this parameter is not set, it will set to + the environment variable `GEM5_RESOURCE_DIR`. If the environment is not + set it will default to `~/.cache/gem5` if available, otherwise the CWD. + :param download_md5_mismatch: If the resource is present, but does not + have the correct md5 value, the resoruce will be deleted and + re-downloaded if this value is True. Otherwise an exception will be + thrown. True by default. + """ + + # If the `resource_directory` parameter is not set via this function, we + # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not + # set we call `_get_default_resource_dir()` to determine where the + # resource directory is, or should be, located. + if resource_directory == None: + resource_directory = os.getenv( + "GEM5_RESOURCE_DIR", _get_default_resource_dir() + ) + + # Small checks here to ensure the resource directory is valid. + if os.path.exists(resource_directory): + if not os.path.isdir(resource_directory): + raise Exception( + "gem5 resource directory, " + "'{}', exists but is not a directory".format( + resource_directory + ) + ) + else: + # `exist_ok=True` here as, occasionally, if multiple instance of + # gem5 are started simultaneously, a race condition can exist to + # create the resource directory. Without `exit_ok=True`, threads + # which lose this race will thrown a `FileExistsError` exception. + # `exit_ok=True` ensures no exception is thrown. + os.makedirs(resource_directory, exist_ok=True) + + # This is the path to which the resource is to be stored. + to_path = os.path.join(resource_directory, resource_name) + + # Download the resource if it does not already exist. + get_resource( + resource_name=resource_name, + to_path=os.path.join(resource_directory, resource_name), + download_md5_mismatch=download_md5_mismatch, + ) + + # Obtain the JSON resource entry for this resource + resource_json = get_resources_json_obj(resource_name) + + # Obtain the type from the JSON. From this we will determine what subclass + # of `AbstractResource` we are to create and return. + resources_type = resource_json["type"] + + if resources_type == "resource": + # This is a stop-gap measure to ensure to work with older versions of + # the "resource.json" file. These should be replaced with their + # respective specializations ASAP and this case removed. + if ( + "additional_metadata" in resource_json + and "root_partition" in resource_json["additional_metadata"] + ): + # In this case we should return a DiskImageResource. + root_partition = resource_json["additional_metadata"][ + "root_partition" + ] + return DiskImageResource( + local_path=to_path, root_partition=root_partition + ) + return CustomResource(local_path=to_path) + + assert resources_type in _get_resource_json_type_map + resource_class = _get_resource_json_type_map[resources_type] + + # Once we know what AbstractResource subclass we are using, we create it. + # The fields in the JSON object are assumed to map like-for-like to the + # subclass contructor, so we can pass the resource_json map directly. + return resource_class(local_path=to_path, **resource_json) + + +def _get_default_resource_dir() -> str: + """ + Obtain the default gem5 resources directory on the host system. This + function will iterate through sensible targets until it finds one that + works on the host system. + + :returns: The default gem5 resources directory. + """ + test_list = [ + # First try `~/.cache/gem5`. + os.path.join(Path.home(), ".cache", "gem5"), + # Last resort, just put things in the cwd. + os.path.join(Path.cwd(), "resources"), + ] + + for path in test_list: + if os.path.exists(path): # If the path already exists... + if os.path.isdir(path): # Check to see the path is a directory. + return path # If so, the path is valid and can be used. + else: # If the path does not exist, try to create it. + try: + os.makedirs(path, exist_ok=False) + return path + except OSError: + continue # If the path cannot be created, then try another. + + raise Exception("Cannot find a valid location to download resources") + + +# The following classes exist to preserve backwards functionality between the +# API for obtaining resources in v21.1.0 and prior. class CustomResource(AbstractResource): @@ -71,134 +429,101 @@ class CustomResource(AbstractResource): A custom gem5 resource. This can be used to encapsulate a resource provided by a gem5 user as opposed to one available within the gem5 resources repository. + + **Warning**: This class is deprecated and will be removed in future + releases of gem5. Please use the correct `AbstractResource` subclass + instead. """ def __init__(self, local_path: str, metadata: Dict = {}): """ :param local_path: The path of the resource on the host system. - :param metadata: Add metadata for the custom resource. + :param metadata: Add metadata for the custom resource. **Warning:** + As of v22.1.1, this parameter is not used. """ - super().__init__(local_path=local_path, metadata=metadata) + warn( + "The `CustomResource` class is deprecated. Please use an " + "`AbstractResource` subclass instead." + ) + if bool(metadata): # Empty dicts cast to False + warn( + "the `metadata` parameter was set via the `CustomResource` " + "constructor. This parameter is not used." + ) + super().__init__(local_path=local_path) -class CustomDiskImageResource(CustomResource): +class CustomDiskImageResource(DiskImageResource): """ A custom disk image gem5 resource. It can be used to specify a custom, local disk image. + + **Warning**: This class is deprecated and will be removed in future + releases of gem5. Please use the `DiskImageResource` class instead. This + class is merely a wrapper for it. """ def __init__( self, local_path: str, - disk_root_partition: Optional[str] = None, + root_partition: Optional[str] = None, metadata: Dict = {}, ): """ :param local_path: The path of the disk image on the host system. - :param disk_root_partition: The root disk partition to use. - :param metadata: Metadata for the resource. + :param root_partition: The root disk partition to use. + :param metadata: Metadata for the resource. **Warning:** As of " + "v22.1.1, this parameter is not used. """ - - # Behind the scenes, we set the the root partition via the metadata. - # For a traditional, non-custom, resource it is the metadata that is - # used to specify the disk image partition root. Therefore, when the - # root disk partition specified during the construction, we apply it as - # metadata. - if disk_root_partition: - disk_root_partition_dict = { - "additional_metadata": {"root_partition": disk_root_partition} - } - metadata.update(disk_root_partition_dict) - - super().__init__(local_path=local_path, metadata=metadata) - - -class Resource(AbstractResource): - """ - An official gem5 resources as hosted within our gem5 resources repository - (). - - A user need only specify the name of the resource during construction. The - resource will be downloaded if needed. A list of available resources can - be obtained via `downloader.list_resources()`. - """ - - def __init__( - self, - resource_name: str, - resource_directory: Optional[str] = None, - download_md5_mismatch: bool = True, - ): - """ - :param resource_name: The name of the gem5 resource. - :param resource_directory: The location of the directory in which the - resource is to be stored. If this parameter is not set, it will set to - the environment variable `GEM5_RESOURCE_DIR`. If the environment is not - set it will default to `~/.cache/gem5` if available, otherwise the CWD. - :param download_md5_mismatch: If the resource is present, but does not - have the correct md5 value, the resoruce will be deleted and - re-downloaded if this value is True. Otherwise an exception will be - thrown. True by default. - """ - - if resource_directory == None: - resource_directory = os.getenv( - "GEM5_RESOURCE_DIR", self._get_default_resource_dir() + warn( + "The `CustomDiskImageResource` class is deprecated. Please use " + "`DiskImageResource` instead." + ) + if bool(metadata): # Empty dicts cast to False + warn( + "the `metadata` parameter was set via the " + "`CustomDiskImageResource` constructor. This parameter is not " + "used." ) + super().__init__(local_path=local_path, root_partition=root_partition) - if os.path.exists(resource_directory): - if not os.path.isdir(resource_directory): - raise Exception( - "gem5 resource directory, " - "'{}', exists but is not a directory".format( - resource_directory - ) - ) - else: - # `exist_ok=True` here as, occasionally, if multiple instance of - # gem5 are started simultaneously, a race condition can exist to - # create the resource directory. Without `exit_ok=True`, threads - # which lose this race will thrown a `FileExistsError` exception. - # `exit_ok=True` ensures no exception is thrown. - os.makedirs(resource_directory, exist_ok=True) - to_path = os.path.join(resource_directory, resource_name) +def Resource( + resource_name: str, + resource_directory: Optional[str] = None, + download_md5_mismatch: bool = True, +) -> AbstractResource: + """ + This function was created to maintain backwards compability for v21.1.0 + and prior releases of gem5 where `Resource` was a class. - super().__init__( - local_path=to_path, metadata=get_resources_json_obj(resource_name) - ) - get_resource( - resource_name=resource_name, - to_path=to_path, - download_md5_mismatch=download_md5_mismatch, - ) + In the interests of gem5-resource specialization, the `Resource` class + has been dropped. Instead users are advized to use the `obtain_resource` + function which will return the correct `AbstractResource` implementation. + This function (disguised as a class) wraps this function. + """ - def _get_default_resource_dir(cls) -> str: - """ - Obtain the default gem5 resources directory on the host system. This - function will iterate through sensible targets until it finds one that - works on the host system. + warn( + "`Resource` has been deprecated. Please use the `obtain_resource` " + "function instead." + ) - :returns: The default gem5 resources directory. - """ - test_list = [ - # First try `~/.cache/gem5`. - os.path.join(Path.home(), ".cache", "gem5"), - # Last resort, just put things in the cwd. - os.path.join(Path.cwd(), "resources"), - ] + return obtain_resource( + resource_name=resource_name, + resource_directory=resource_directory, + download_md5_mismatch=download_md5_mismatch, + ) - for path in test_list: - if os.path.exists(path): # If the path already exists... - if os.path.isdir( - path - ): # Check to see the path is a directory. - return path # If so, the path is valid and can be used. - else: # If the path does not exist, try to create it. - try: - os.makedirs(path, exist_ok=False) - return path - except OSError: - continue # If the path cannot be created, then try another. - raise Exception("Cannot find a valid location to download resources") +_get_resource_json_type_map = { + "disk-image": DiskImageResource, + "binary": BinaryResource, + "kernel": KernelResource, + "checkpoint": CheckpointResource, + "git": GitResource, + "bootloader": BootloaderResource, + "file": FileResource, + "directory": DirectoryResource, + "simpoint": SimpointResource, + "resource": Resource, +} diff --git a/src/python/gem5/resources/workload.py b/src/python/gem5/resources/workload.py index 2ae89655e8..e0a19d0792 100644 --- a/src/python/gem5/resources/workload.py +++ b/src/python/gem5/resources/workload.py @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from .downloader import get_workload_json_obj -from .resource import Resource +from .resource import obtain_resource from typing import Dict, Any, Optional @@ -209,7 +209,7 @@ class Workload(AbstractWorkload): assert isinstance(key, str) value = workload_json["resources"][key] assert isinstance(value, str) - params[key] = Resource( + params[key] = obtain_resource( value, resource_directory=resource_directory ) diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py index 9b5c2c67ff..514894f8d2 100644 --- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py +++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py @@ -94,44 +94,44 @@ gem5_verify_config( length=constants.quick_tag, ) -gem5_verify_config( - name="test-simpoints-se-checkpoint", - fixtures=(), - verifiers=(), - config=joinpath( - config.base_dir, - "configs", - "example", - "gem5_library", - "checkpoints", - "simpoints-se-checkpoint.py", - ), - config_args=[ - "--checkpoint-path", - joinpath(resource_path, "se_checkpoint_folder-save"), - ], - valid_isas=(constants.all_compiled_tag,), - valid_hosts=constants.supported_hosts, - length=constants.quick_tag, -) +# gem5_verify_config( +# name="test-simpoints-se-checkpoint", +# fixtures=(), +# verifiers=(), +# config=joinpath( +# config.base_dir, +# "configs", +# "example", +# "gem5_library", +# "checkpoints", +# "simpoints-se-checkpoint.py", +# ), +# config_args=[ +# "--checkpoint-path", +# joinpath(resource_path, "se_checkpoint_folder-save"), +# ], +# valid_isas=(constants.all_compiled_tag,), +# valid_hosts=constants.supported_hosts, +# length=constants.quick_tag, +# ) -gem5_verify_config( - name="test-simpoints-se-restore", - fixtures=(), - verifiers=(), - config=joinpath( - config.base_dir, - "configs", - "example", - "gem5_library", - "checkpoints", - "simpoints-se-restore.py", - ), - config_args=[], - valid_isas=(constants.all_compiled_tag,), - valid_hosts=constants.supported_hosts, - length=constants.quick_tag, -) +# gem5_verify_config( +# name="test-simpoints-se-restore", +# fixtures=(), +# verifiers=(), +# config=joinpath( +# config.base_dir, +# "configs", +# "example", +# "gem5_library", +# "checkpoints", +# "simpoints-se-restore.py", +# ), +# config_args=[], +# valid_isas=(constants.all_compiled_tag,), +# valid_hosts=constants.supported_hosts, +# length=constants.quick_tag, +# ) if os.access("/dev/kvm", mode=os.R_OK | os.W_OK): # The x86-ubuntu-run uses KVM cores, this test will therefore only be run diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py new file mode 100644 index 0000000000..e0a8dddd07 --- /dev/null +++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py @@ -0,0 +1,196 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import unittest + +from gem5.resources.resource import * +from gem5.isas import ISA + + +class ResourceSpecializationSuite(unittest.TestCase): + """This suite tests that `gem5.resource.resource` casts to the correct + `AbstractResource` specialization when using the `obtain_resource` + function. + """ + + @classmethod + def setUpClass(cls): + """Prior to running the suite we set the resource directory to + "ref/resource-specialization.json" + """ + os.environ["GEM5_RESOURCE_JSON"] = os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "resource-specialization.json", + ) + + @classmethod + def tearDownClass(cls) -> None: + """After running the suite we unset the gem5-resource JSON file, as to + not interfere with others tests. + """ + del os.environ["GEM5_RESOURCE_JSON"] + + def get_resource_dir(cls) -> str: + """To ensure the resources are cached to the same directory as all + other tests, this function returns the location of the testing + directories "resources" directory. + """ + return os.path.join( + os.path.realpath(os.path.dirname(__file__)), + os.pardir, + os.pardir, + os.pardir, + "gem5", + "resources", + ) + + def test_binary_resource(self) -> None: + """Tests the loading of of a BinaryResource""" + resource = obtain_resource( + resource_name="binary-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, BinaryResource) + + self.assertEquals( + "binary-example documentation.", resource.get_documentation() + ) + self.assertEquals("src/simple", resource.get_source()) + self.assertEquals(ISA.ARM, resource.get_architecture()) + + def test_kernel_resource(self) -> None: + """Tests the loading of a KernelResource.""" + resource = obtain_resource( + resource_name="kernel-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, KernelResource) + + self.assertEquals( + "kernel-example documentation.", resource.get_documentation() + ) + self.assertEquals("src/linux-kernel", resource.get_source()) + self.assertEquals(ISA.RISCV, resource.get_architecture()) + + def test_bootloader_resource(self) -> None: + """Tests the loading of a BootloaderResource.""" + resource = obtain_resource( + resource_name="bootloader-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, BootloaderResource) + + self.assertEquals( + "bootloader documentation.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) + self.assertIsNone(resource.get_architecture()) + + def test_disk_image_resource(self) -> None: + """Tests the loading of a DiskImageResource.""" + resource = obtain_resource( + resource_name="disk-image-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, DiskImageResource) + + self.assertEquals( + "disk-image documentation.", resource.get_documentation() + ) + self.assertEquals("src/x86-ubuntu", resource.get_source()) + self.assertEquals("1", resource.get_root_partition()) + + def test_checkpoint_resource(self) -> None: + """Tests the loading of a CheckpointResource.""" + resource = obtain_resource( + resource_name="checkpoint-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, CheckpointResource) + + self.assertEquals( + "checkpoint-example documentation.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) + + def test_git_resource(self) -> None: + """Tests the loading of a GitResource.""" + resource = obtain_resource( + resource_name="git-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, GitResource) + + self.assertIsNone(resource.get_documentation()) + self.assertIsNone(resource.get_source()) + + def test_simpoint_resource(self) -> None: + """Tests the loading of a Simpoint resource.""" + resource = obtain_resource( + resource_name="simpoint-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, SimpointResource) + + self.assertEquals( + "simpoint documentation.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) + + def test_file_resource(self) -> None: + """Tests the loading of a FileResource.""" + resource = obtain_resource( + resource_name="file-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, FileResource) + + self.assertIsNone(resource.get_documentation()) + self.assertIsNone(resource.get_source()) + + def test_directory_resource(self) -> None: + """Tests the loading of a DirectoryResource.""" + resource = obtain_resource( + resource_name="directory-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, DirectoryResource) + + self.assertEquals( + "directory-example documentation.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py index 9620289446..fab0bbfbf1 100644 --- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py +++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py @@ -29,7 +29,11 @@ import tempfile import os from gem5.resources.workload import Workload, CustomWorkload -from gem5.resources.resource import Resource +from gem5.resources.resource import ( + BinaryResource, + DiskImageResource, + obtain_resource, +) from gem5.resources.downloader import _resources_json_version_required from typing import Dict @@ -50,7 +54,7 @@ class CustomWorkloadTestSuite(unittest.TestCase): "previous-versions" : {}, "resources": [ { - "type" : "resource", + "type" : "binary", "name" : "x86-hello64-static", "documentation" : "A 'Hello World!' binary.", "architecture" : "X86", @@ -73,7 +77,7 @@ class CustomWorkloadTestSuite(unittest.TestCase): cls.custom_workload = CustomWorkload( function="set_se_binary_workload", parameters={ - "binary": Resource("x86-hello64-static"), + "binary": obtain_resource("x86-hello64-static"), "arguments": ["hello", 6], }, ) @@ -100,7 +104,7 @@ class CustomWorkloadTestSuite(unittest.TestCase): self.assertEquals(2, len(parameters)) self.assertTrue("binary" in parameters) - self.assertTrue(isinstance(parameters["binary"], Resource)) + self.assertTrue(isinstance(parameters["binary"], BinaryResource)) self.assertTrue("arguments" in parameters) self.assertTrue(isinstance(parameters["arguments"], list)) @@ -156,7 +160,7 @@ class WorkloadTestSuite(unittest.TestCase): "previous-versions" : {}, "resources": [ { - "type" : "resource", + "type" : "kernel", "name" : "x86-linux-kernel-5.2.3", "documentation" : "The linux kernel (v5.2.3), compiled to X86.", "architecture" : "X86", @@ -166,7 +170,7 @@ class WorkloadTestSuite(unittest.TestCase): "source" : "src/linux-kernel" }, { - "type" : "resource", + "type" : "disk-image", "name" : "x86-ubuntu-18.04-img", "documentation" : "A disk image containing Ubuntu 18.04 for x86..", "architecture" : "X86", @@ -174,9 +178,7 @@ class WorkloadTestSuite(unittest.TestCase): "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49", "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz", "source" : "src/x86-ubuntu", - "additional_metadata" : { - "root_partition": "1" - } + "root_partition": "1" }, { "type" : "workload", @@ -226,10 +228,12 @@ class WorkloadTestSuite(unittest.TestCase): self.assertEqual(3, len(parameters)) self.assertTrue("kernel" in parameters) - self.assertTrue(isinstance(parameters["kernel"], Resource)) + self.assertTrue(isinstance(parameters["kernel"], BinaryResource)) self.assertTrue("disk_image" in parameters) - self.assertTrue(isinstance(parameters["disk_image"], Resource)) + self.assertTrue( + isinstance(parameters["disk_image"], DiskImageResource) + ) self.assertTrue("readfile_contents" in parameters) self.assertTrue( diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json new file mode 100644 index 0000000000..77ffc10705 --- /dev/null +++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json @@ -0,0 +1,99 @@ + +{ + "version" : "develop", + "url_base" : "http://dist.gem5.org/dist/v22-1", + "previous-versions" : { + "develop" : "https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/develop/resources.json?format=TEXT", + "21.2" : "http://resources.gem5.org/prev-resources-json/resources-21-2.json" + }, + "resources": [ + { + "type" : "kernel", + "name" : "kernel-example", + "documentation" : "kernel-example documentation.", + "architecture" : "RISCV", + "is_zipped" : false, + "md5sum" : "60a53c7d47d7057436bf4b9df707a841", + "url" : "{url_base}/kernels/x86/static/vmlinux-5.4.49", + "source" : "src/linux-kernel" + }, + { + "type" : "disk-image", + "name" : "disk-image-example", + "documentation" : "disk-image documentation.", + "architecture" : "X86", + "is_zipped" : true, + "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49", + "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz", + "source" : "src/x86-ubuntu", + "root_partition": "1" + }, + { + "type" : "binary", + "name" : "binary-example", + "documentation" : "binary-example documentation.", + "architecture" : "ARM", + "is_zipped" : false, + "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6", + "url" : "{url_base}/test-progs/hello/bin/arm/linux/hello64-static", + "source" : "src/simple" + }, + { + "type" : "bootloader", + "name" : "bootloader-example", + "documentation" : "bootloader documentation.", + "is_zipped" : false, + "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6", + "url" : "{url_base}/test-progs/hello/bin/arm/linux/hello64-static" + }, + { + "type" : "checkpoint", + "name" : "checkpoint-example", + "documentation" : "checkpoint-example documentation.", + "architecture": "RISCV", + "is_zipped" : false, + "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace", + "source" : null, + "is_tar_archive" : true, + "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar" + }, + { + "type" : "git", + "name" : "git-example", + "documentation" : null, + "is_zipped" : false, + "is_tar_archive" : true, + "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6", + "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar" + }, + { + "type" : "file", + "name" : "file-example", + "documentation" : null, + "is_zipped" : false, + "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6", + "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar", + "source" : null + }, + { + "type" : "directory", + "name" : "directory-example", + "documentation" : "directory-example documentation.", + "is_zipped" : false, + "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace", + "source" : null, + "is_tar_archive" : true, + "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar" + }, + { + "type" : "simpoint", + "name" : "simpoint-example", + "documentation" : "simpoint documentation.", + "is_zipped" : false, + "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace", + "source" : null, + "is_tar_archive" : true, + "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar" + } + ] +} From a9b69ee055d7b82ba5da0e4dbfaecd42be41de6b Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Thu, 19 Jan 2023 14:30:19 +0000 Subject: [PATCH 204/492] stdlib: Add null/None versioning in resources.json This patch allows for the "version" field in the resources.json file to be `null` (translated to `None` in the Python JSON package) or not declared. In this case the resources.json file will be used regardless as to what version the gem5 binary is set. This is useful for testing purposes. Tests have been updated to utilize this where possible. Change-Id: I9d8ae18cb3e61d58bc822bad30853fa3442cb33f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67337 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Jason Lowe-Power --- src/python/gem5/resources/downloader.py | 7 +- .../resources/pyunit_workload_checks.py | 99 +++---------------- .../refs/workload-checks-custom-workload.json | 17 ++++ .../resources/refs/workload-checks.json | 40 ++++++++ 4 files changed, 76 insertions(+), 87 deletions(-) create mode 100644 tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json create mode 100644 tests/pyunit/stdlib/resources/refs/workload-checks.json diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py index 0b67ecdebd..4a2ed5d332 100644 --- a/src/python/gem5/resources/downloader.py +++ b/src/python/gem5/resources/downloader.py @@ -154,8 +154,13 @@ def _get_resources_json() -> Dict: # If the current version pulled is not correct, look up the # "previous-versions" field to find the correct one. + # If the resource JSON file does not have a "version" field or it's + # null/None, then we will use this resource JSON file (this is usefull for + # testing purposes). version = _resources_json_version_required() - if to_return["version"] != version: + json_version = None if "version" not in to_return else to_return["version"] + + if json_version and json_version != version: if version in to_return["previous-versions"].keys(): to_return = _get_resources_json_at_path( path=to_return["previous-versions"][version] diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py index fab0bbfbf1..2bc31f5a3f 100644 --- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py +++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py @@ -25,7 +25,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import unittest -import tempfile import os from gem5.resources.workload import Workload, CustomWorkload @@ -34,7 +33,6 @@ from gem5.resources.resource import ( DiskImageResource, obtain_resource, ) -from gem5.resources.downloader import _resources_json_version_required from typing import Dict @@ -46,33 +44,12 @@ class CustomWorkloadTestSuite(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - file_contents = ( - "{" - + f'"version" : "{_resources_json_version_required()}",' - + """ - "url_base" : "http://dist.gem5.org/dist/v22-0", - "previous-versions" : {}, - "resources": [ - { - "type" : "binary", - "name" : "x86-hello64-static", - "documentation" : "A 'Hello World!' binary.", - "architecture" : "X86", - "is_zipped" : false, - "md5sum" : "dbf120338b37153e3334603970cebd8c", - "url" : "{url_base}/test-progs/hello/bin/x86/linux/hello64-static", - "source" : "src/simple" - } - ] -} - """ - ) - file = tempfile.NamedTemporaryFile(mode="w", delete=False) - file.write(file_contents) - file.close() - cls.test_json = file.name - os.environ["GEM5_RESOURCE_JSON"] = cls.test_json + os.environ["GEM5_RESOURCE_JSON"] = os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "workload-checks-custom-workload.json", + ) cls.custom_workload = CustomWorkload( function="set_se_binary_workload", @@ -84,9 +61,8 @@ class CustomWorkloadTestSuite(unittest.TestCase): @classmethod def tearDownClass(cls): - # Remove the test json file and unset the environment variable so this - # test does not interfere with others. - os.remove(cls.test_json) + # Unset the environment variable so this test does not interfere with + # others. os.environ["GEM5_RESOURCE_JSON"] def test_get_function_str(self) -> None: @@ -149,67 +125,18 @@ class WorkloadTestSuite(unittest.TestCase): @classmethod def setUpClass(cls): - # In this constructor we create a json file to load then create a test - # workload. - file_contents = ( - "{" - + f'"version" : "{_resources_json_version_required()}",' - + """ - "url_base" : "http://dist.gem5.org/dist/v22-0", - "previous-versions" : {}, - "resources": [ - { - "type" : "kernel", - "name" : "x86-linux-kernel-5.2.3", - "documentation" : "The linux kernel (v5.2.3), compiled to X86.", - "architecture" : "X86", - "is_zipped" : false, - "md5sum" : "4838c99b77d33c8307b939c16624e4ac", - "url" : "{url_base}/kernels/x86/static/vmlinux-5.2.3", - "source" : "src/linux-kernel" - }, - { - "type" : "disk-image", - "name" : "x86-ubuntu-18.04-img", - "documentation" : "A disk image containing Ubuntu 18.04 for x86..", - "architecture" : "X86", - "is_zipped" : true, - "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49", - "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz", - "source" : "src/x86-ubuntu", - "root_partition": "1" - }, - { - "type" : "workload", - "name" : "simple-boot", - "documentation" : "Description of workload here", - "function" : "set_kernel_disk_workload", - "resources" : { - "kernel" : "x86-linux-kernel-5.2.3", - "disk_image" : "x86-ubuntu-18.04-img" - }, - "additional_params" : { - "readfile_contents" : "echo 'Boot successful'; m5 exit" - } - } - ] -} - """ + os.environ["GEM5_RESOURCE_JSON"] = os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "workload-checks.json", ) - file = tempfile.NamedTemporaryFile(mode="w", delete=False) - file.write(file_contents) - file.close() - - cls.test_json = file.name - os.environ["GEM5_RESOURCE_JSON"] = cls.test_json cls.workload = Workload("simple-boot") @classmethod def tearDownClass(cls): - # Remove the test json file and unset the environment variable so this - # test does not interfere with others. - os.remove(cls.test_json) + # Unset the environment variable so this test does not interfere with + # others. os.environ["GEM5_RESOURCE_JSON"] def test_get_function_str(self) -> None: diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json new file mode 100644 index 0000000000..a9dd2aaa46 --- /dev/null +++ b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json @@ -0,0 +1,17 @@ +{ + "version" : null, + "url_base" : "http://dist.gem5.org/dist/v22-0", + "previous-versions" : {}, + "resources": [ + { + "type" : "binary", + "name" : "x86-hello64-static", + "documentation" : "A 'Hello World!' binary.", + "architecture" : "X86", + "is_zipped" : false, + "md5sum" : "dbf120338b37153e3334603970cebd8c", + "url" : "{url_base}/test-progs/hello/bin/x86/linux/hello64-static", + "source" : "src/simple" + } + ] +} diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks.json b/tests/pyunit/stdlib/resources/refs/workload-checks.json new file mode 100644 index 0000000000..4f7e76bfb5 --- /dev/null +++ b/tests/pyunit/stdlib/resources/refs/workload-checks.json @@ -0,0 +1,40 @@ +{ + "url_base" : "http://dist.gem5.org/dist/v22-0", + "previous-versions" : {}, + "resources": [ + { + "type" : "kernel", + "name" : "x86-linux-kernel-5.2.3", + "documentation" : "The linux kernel (v5.2.3), compiled to X86.", + "architecture" : "X86", + "is_zipped" : false, + "md5sum" : "4838c99b77d33c8307b939c16624e4ac", + "url" : "{url_base}/kernels/x86/static/vmlinux-5.2.3", + "source" : "src/linux-kernel" + }, + { + "type" : "disk-image", + "name" : "x86-ubuntu-18.04-img", + "documentation" : "A disk image containing Ubuntu 18.04 for x86..", + "architecture" : "X86", + "is_zipped" : true, + "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49", + "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz", + "source" : "src/x86-ubuntu", + "root_partition": "1" + }, + { + "type" : "workload", + "name" : "simple-boot", + "documentation" : "Description of workload here", + "function" : "set_kernel_disk_workload", + "resources" : { + "kernel" : "x86-linux-kernel-5.2.3", + "disk_image" : "x86-ubuntu-18.04-img" + }, + "additional_params" : { + "readfile_contents" : "echo 'Boot successful'; m5 exit" + } + } + ] +} From cc838d72a6aad837397a4e3c6f8fac9943267767 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 20 Jan 2023 00:34:09 +0000 Subject: [PATCH 205/492] stdlib: Update resources to have downloads optional With this patch, when a resource entry does not specify a "url" field, there is no file downloaded. This is necessary infrastructure for gem5-resources which do not have specific files/directories to be downloaded but exist solely in the resources.json file. Change-Id: I0d92e830bfcef750119078b8c226b0659ba7f6cb Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67338 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/python/gem5/resources/resource.py | 82 ++++++++++++++------------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py index e4873d689d..0987453c9a 100644 --- a/src/python/gem5/resources/resource.py +++ b/src/python/gem5/resources/resource.py @@ -66,7 +66,7 @@ class AbstractResource: def __init__( self, - local_path: str, + local_path: Optional[str] = None, documentation: Optional[str] = None, source: Optional[str] = None, ): @@ -80,7 +80,7 @@ class AbstractResource: may be found. Not a required parameter. By default is None. """ - if not os.path.exists(local_path): + if local_path and not os.path.exists(local_path): raise Exception( f"Local path specified for resource, '{local_path}', does not " "exist." @@ -90,7 +90,7 @@ class AbstractResource: self._documentation = documentation self._source = source - def get_local_path(self) -> str: + def get_local_path(self) -> Optional[str]: """Returns the local path of the resource.""" return self._local_path @@ -322,45 +322,49 @@ def obtain_resource( thrown. True by default. """ - # If the `resource_directory` parameter is not set via this function, we - # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not - # set we call `_get_default_resource_dir()` to determine where the - # resource directory is, or should be, located. - if resource_directory == None: - resource_directory = os.getenv( - "GEM5_RESOURCE_DIR", _get_default_resource_dir() - ) - - # Small checks here to ensure the resource directory is valid. - if os.path.exists(resource_directory): - if not os.path.isdir(resource_directory): - raise Exception( - "gem5 resource directory, " - "'{}', exists but is not a directory".format( - resource_directory - ) - ) - else: - # `exist_ok=True` here as, occasionally, if multiple instance of - # gem5 are started simultaneously, a race condition can exist to - # create the resource directory. Without `exit_ok=True`, threads - # which lose this race will thrown a `FileExistsError` exception. - # `exit_ok=True` ensures no exception is thrown. - os.makedirs(resource_directory, exist_ok=True) - - # This is the path to which the resource is to be stored. - to_path = os.path.join(resource_directory, resource_name) - - # Download the resource if it does not already exist. - get_resource( - resource_name=resource_name, - to_path=os.path.join(resource_directory, resource_name), - download_md5_mismatch=download_md5_mismatch, - ) - # Obtain the JSON resource entry for this resource resource_json = get_resources_json_obj(resource_name) + to_path = None + # If the "url" field is specified, the resoruce must be downloaded. + if "url" in resource_json and resource_json["url"]: + + # If the `resource_directory` parameter is not set via this function, we + # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not + # set we call `_get_default_resource_dir()` to determine where the + # resource directory is, or should be, located. + if resource_directory == None: + resource_directory = os.getenv( + "GEM5_RESOURCE_DIR", _get_default_resource_dir() + ) + + # Small checks here to ensure the resource directory is valid. + if os.path.exists(resource_directory): + if not os.path.isdir(resource_directory): + raise Exception( + "gem5 resource directory, " + "'{}', exists but is not a directory".format( + resource_directory + ) + ) + else: + # `exist_ok=True` here as, occasionally, if multiple instance of + # gem5 are started simultaneously, a race condition can exist to + # create the resource directory. Without `exit_ok=True`, threads + # which lose this race will thrown a `FileExistsError` exception. + # `exit_ok=True` ensures no exception is thrown. + os.makedirs(resource_directory, exist_ok=True) + + # This is the path to which the resource is to be stored. + to_path = os.path.join(resource_directory, resource_name) + + # Download the resource if it does not already exist. + get_resource( + resource_name=resource_name, + to_path=os.path.join(resource_directory, resource_name), + download_md5_mismatch=download_md5_mismatch, + ) + # Obtain the type from the JSON. From this we will determine what subclass # of `AbstractResource` we are to create and return. resources_type = resource_json["type"] From e1601954f052ec51d40ff46f1f14d4b33f4ca556 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 20 Jan 2023 13:40:22 +0000 Subject: [PATCH 206/492] stdlib: Implement Simpoint Resources This patches does the following: - Adds 'SimpointResource' which encapsulates Simpoint data and functionality. It replaces the old 'gem5.util.simpoint.SimPoint' class. Simpoints can be loaded from gem5-resources using the `obtain_resource` function. - Adds 'SimpointDirectoryResource'. This inherits form 'SimpointResource'. While 'SimpointResource' takes raw Simpoint data via parameters, 'SimpointDirectoryResource' assumes the data exists in files, in a directory. - Updates the "configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py" and "configs/example/gem5_library/checkpoints/simpoints-se-restory.py" example files to utilize this new Simpoint resource classes. **Note**: While the old "SimPoint" class ("src/python/gem5/util/simpoint.py") is marked as deprecated, it may be difficult to utilize given updates to the APIs in the gem5 stdlib Cores and Simulator modules. Change-Id: I9bed5c643ffc735838c9f22a58c53547941010e7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67339 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- .../checkpoints/simpoints-se-checkpoint.py | 19 +- .../checkpoints/simpoints-se-restore.py | 27 ++- .../components/boards/se_binary_workload.py | 27 +-- src/python/gem5/resources/resource.py | 196 +++++++++++++++++- .../gem5/simulate/exit_event_generators.py | 4 +- src/python/gem5/utils/simpoint.py | 17 +- .../test_gem5_library_examples.py | 74 +++---- .../pyunit_resource_specialization.py | 41 ++++ .../refs/resource-specialization.json | 26 ++- 9 files changed, 354 insertions(+), 77 deletions(-) diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py b/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py index d2d1af730f..b5eb7e9912 100644 --- a/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py +++ b/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py @@ -58,6 +58,7 @@ from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.resources.workload import Workload +from gem5.resources.resource import obtain_resource, SimpointResource from pathlib import Path from gem5.components.cachehierarchies.classic.no_cache import NoCache from gem5.simulate.exit_event_generators import ( @@ -108,7 +109,23 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -board.set_workload(Workload("x86-print-this-15000-with-simpoints")) +# board.set_workload( +# Workload("x86-print-this-15000-with-simpoints") +# +# **Note: This has been removed until we update the resources.json file to +# encapsulate the new Simpoint format. +# Below we set the simpount manually. + +board.set_se_simpoint_workload( + binary=obtain_resource("x86-print-this"), + arguments=["print this", 15000], + simpoint=SimpointResource( + simpoint_interval=1000000, + simpoint_list=[2, 3, 4, 15], + weight_list=[0.1, 0.2, 0.4, 0.3], + warmup_interval=1000000, + ), +) dir = Path(args.checkpoint_path) dir.mkdir(exist_ok=True) diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py index f8f48d0ec1..5ff82dba04 100644 --- a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py +++ b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py @@ -63,8 +63,9 @@ from gem5.components.memory import DualChannelDDR4_2400 from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA -from gem5.resources.resource import Resource +from gem5.resources.resource import SimpointResource, obtain_resource from gem5.resources.workload import Workload +from gem5.resources.resource import SimpointResource from pathlib import Path from m5.stats import reset, dump @@ -96,11 +97,29 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -# Here we obtain the workloadfrom gem5 resources, the checkpoint in this +# Here we obtain the workload from gem5 resources, the checkpoint in this # workload was generated from # `configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py`. -board.set_workload( - Workload("x86-print-this-15000-with-simpoints-and-checkpoint") +# board.set_workload( +# Workload("x86-print-this-15000-with-simpoints-and-checkpoint") +# +# **Note: This has been removed until we update the resources.json file to +# encapsulate the new Simpoint format. +# Below we set the simpount manually. +# +# This loads a single checkpoint as an example of using simpoints to simulate +# the function of a single simpoint region. + +board.set_se_simpoint_workload( + binary=obtain_resource("x86-print-this"), + arguments=["print this", 15000], + simpoint=SimpointResource( + simpoint_interval=1000000, + simpoint_list=[2, 3, 4, 15], + weight_list=[0.1, 0.2, 0.4, 0.3], + warmup_interval=1000000, + ), + checkpoint=obtain_resource("simpoints-se-checkpoints-v22-1-v2"), ) diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index acedfaf9a9..31931106c9 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -31,8 +31,8 @@ from ...resources.resource import ( BinaryResource, CheckpointResource, SimpointResource, + SimpointDirectoryResource, ) -from gem5.utils.simpoint import SimPoint from m5.objects import SEWorkload, Process @@ -125,7 +125,7 @@ class SEBinaryWorkload: self, binary: BinaryResource, arguments: List[str] = [], - simpoint: Union[SimpointResource, SimPoint] = None, + simpoint: SimpointResource = None, checkpoint: Optional[Union[Path, CheckpointResource]] = None, ) -> None: """Set up the system to run a SimPoint workload. @@ -135,28 +135,23 @@ class SEBinaryWorkload: * Dynamically linked executables are partially supported when the host ISA and the simulated ISA are the same. - **Warning:** SimPoints only works with one core + **Warning:** Simpoints only works with one core :param binary: The resource encapsulating the binary to be run. :param arguments: The input arguments for the binary - :param simpoint: The SimPoint object or Resource that contains the list of + :param simpoint: The SimpointResource that contains the list of SimPoints starting instructions, the list of weights, and the SimPoints interval :param checkpoint: The checkpoint directory. Used to restore the simulation to that checkpoint. """ - # convert input to SimPoint if necessary - if isinstance(simpoint, SimpointResource): - self._simpoint_object = SimPoint(simpoint) - else: - assert isinstance(simpoint, SimPoint) - self._simpoint_object = simpoint + self._simpoint_resource = simpoint if self.get_processor().get_num_cores() > 1: warn("SimPoints only works with one core") self.get_processor().get_cores()[0]._set_simpoint( - inst_starts=self._simpoint_object.get_simpoint_start_insts(), + inst_starts=self._simpoint_resource.get_simpoint_start_insts(), board_initialized=False, ) @@ -167,11 +162,11 @@ class SEBinaryWorkload: checkpoint=checkpoint, ) - def get_simpoint(self) -> SimPoint: + def get_simpoint(self) -> SimpointResource: """ - Returns the SimPoint object set. If no SimPoint object has been set an - exception is thrown. + Returns the SimpointResorce object set. If no SimpointResource object + has been set an exception is thrown. """ - if getattr(self, "_simpoint_object", None): - return self._simpoint_object + if getattr(self, "_simpoint_resource", None): + return self._simpoint_resource raise Exception("This board does not have a simpoint set.") diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py index 0987453c9a..678497eaa7 100644 --- a/src/python/gem5/resources/resource.py +++ b/src/python/gem5/resources/resource.py @@ -27,13 +27,13 @@ from abc import ABCMeta import os from pathlib import Path -from m5.util import warn +from m5.util import warn, fatal from .downloader import get_resource, get_resources_json_obj from ..isas import ISA, get_isa_from_str -from typing import Optional, Dict, Union, Type +from typing import Optional, Dict, Union, Type, Tuple, List """ Resources are items needed to run a simulation, such as a disk image, kernel, @@ -72,7 +72,7 @@ class AbstractResource: ): """ :param local_path: The path on the host system where this resource is - located + located. :param documentation: Documentation describing this resource. Not a required parameter. By default is None. :param source: The source (as in "source code") for this resource. This @@ -280,22 +280,205 @@ class CheckpointResource(DirectoryResource): ) -class SimpointResource(DirectoryResource): - """A simpoint resource.""" +class SimpointResource(AbstractResource): + """A simpoint resource. This resource stores all information required to + perform a Simpoint creation and restore. It contains the Simpoint, the + Simpoint interval, the weight for each Simpoint, the full warmup length, + and the warmup length for each Simpoint. + """ def __init__( self, - local_path: str, + simpoint_interval: int = None, + simpoint_list: List[int] = None, + weight_list: List[float] = None, + warmup_interval: int = 0, + workload_name: Optional[str] = None, documentation: Optional[str] = None, source: Optional[str] = None, + local_path: Optional[str] = None, **kwargs, ): + """ + :param simpoint_interval: The simpoint interval. + :param simpoint_list: The simpoint list. + :param weight_list: The weight list. + :param warmup_interval: The warmup interval. Default to zero (a value + of zero means effectively not set). + :param workload_name: Simpoints are typically associated with a + particular workload due to their dependency on chosen input parameters. + This field helps backtrack to that resource if required. This should + relate to a workload "name" field in the resource.json file. + """ + super().__init__( local_path=local_path, documentation=documentation, source=source, ) + self._weight_list = weight_list + self._simpoint_list = simpoint_list + self._simpoint_interval = simpoint_interval + self._warmup_interval = warmup_interval + self._workload_name = workload_name + + self._simpoint_start_insts = list( + inst * simpoint_interval for inst in self.get_simpoint_list() + ) + + if self._warmup_interval != 0: + self._warmup_list = self._set_warmup_list() + else: + self._warmup_list = [0] * len(self.get_simpoint_start_insts) + + def get_simpoint_list(self) -> List[int]: + """Returns the a list containing all the Simpoints for the workload.""" + return self._simpoint_list + + def get_simpoint_start_insts(self) -> List[int]: + """Returns a lst containing all the Simpoint starting instrunction + points for the workload. This was calculated by multiplying the + Simpoint with the Simpoint interval when it was generated.""" + return self._simpoint_start_insts + + def get_warmup_interval(self) -> int: + """Returns the instruction length of the warmup interval.""" + return self._warmup_interval + + def get_weight_list(self) -> List[float]: + """Returns the list that contains the weight for each Simpoint. The + order of the weights matches that of the list returned by + `get_simpoint_list(). I.e. `get_weight_list()[3]` is the weight for + simpoint `get_simpoint_list()[3]`.""" + return self._weight_list + + def get_simpoint_interval(self) -> int: + """Returns the Simpoint interval value.""" + return self._simpoint_interval + + def get_warmup_list(self) -> List[int]: + """Returns the a list containing the warmup length for each Simpoint. + Each warmup length in this list corresponds to the Simpoint at the same + index in `get_simpoint_list()`. I.e., `get_warmup_list()[4]` is the + warmup length for Simpoint `get_simpoint_list()[4]`.""" + return self._warmup_list + + def get_workload_name(self) -> Optional[str]: + """Return the workload name this Simpoint is associated with.""" + return self._workload_name + + def _set_warmup_list(self) -> List[int]: + """ + This function uses the warmup_interval, fits it into the + simpoint_start_insts, and outputs a list of warmup instruction lengths + for each SimPoint. + + The warmup instruction length is calculated using the starting + instruction of a SimPoint to minus the warmup_interval and the ending + instruction of the last SimPoint. If it is less than 0, then the warmup + instruction length is the gap between the starting instruction of a + SimPoint and the ending instruction of the last SimPoint. + """ + warmup_list = [] + for index, start_inst in enumerate(self.get_simpoint_start_insts()): + warmup_inst = start_inst - self.get_warmup_interval() + if warmup_inst < 0: + warmup_inst = start_inst + else: + warmup_inst = self.get_warmup_interval() + warmup_list.append(warmup_inst) + # change the starting instruction of a SimPoint to include the + # warmup instruction length + self._simpoint_start_insts[index] = start_inst - warmup_inst + return warmup_list + + +class SimpointDirectoryResource(SimpointResource): + """A Simpoint diretory resource. This Simpoint Resource assumes the + existance of a directory containing a simpoint file and a weight file.""" + + def __init__( + self, + local_path: str, + simpoint_file: str, + weight_file: str, + simpoint_interval: int, + warmup_interval: int, + workload_name: Optional[str] = None, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + """ + :param simpoint_file: The Simpoint file. This file is a list of + Simpoints, each on its own line. It should map 1-to-1 to the weights + file. + :param weight_file: The Simpoint weights file. This file is a list of + weights, each on its own line. + """ + self._simpoint_file = simpoint_file + self._weight_file = weight_file + + # This is a little hack. The functions `get_simpoint_file` and + # `get_weight_file` use the local path, so we set it here despite it + # also being set in the `AbstractResource` constructor. This isn't + # elegant but does not harm. + self._local_path = local_path + ( + simpoint_list, + weight_list, + ) = self._get_weights_and_simpoints_from_file() + + super().__init__( + simpoint_interval=simpoint_interval, + simpoint_list=simpoint_list, + weight_list=weight_list, + warmup_interval=warmup_interval, + workload_name=workload_name, + local_path=local_path, + documentation=documentation, + source=source, + ) + + def get_simpoint_file(self) -> Path: + """Return the Simpoint File path.""" + return Path(Path(self._local_path) / self._simpoint_file) + + def get_weight_file(self) -> Path: + """Returns the Weight File path.""" + return Path(Path(self._local_path) / self._weight_file) + + def _get_weights_and_simpoints_from_file( + self, + ) -> Tuple[List[int], List[int]]: + """This is a helper function to extract the weights and simpoints from + the files. + """ + simpoint_weight_pair = [] + with open(self.get_simpoint_file()) as simpoint_file, open( + self.get_weight_file() + ) as weight_file: + while True: + line = simpoint_file.readline() + if not line: + break + interval = int(line.split(" ", 1)[0]) + line = weight_file.readline() + if not line: + fatal("not engough weights") + weight = float(line.split(" ", 1)[0]) + simpoint_weight_pair.append((interval, weight)) + simpoint_weight_pair.sort(key=lambda obj: obj[0]) + # use simpoint to sort + + weight_list = [] + simpoint_list = [] + for simpoint, weight in simpoint_weight_pair: + simpoint_list.append(simpoint) + weight_list.append(weight) + return simpoint_list, weight_list + def obtain_resource( resource_name: str, @@ -529,5 +712,6 @@ _get_resource_json_type_map = { "file": FileResource, "directory": DirectoryResource, "simpoint": SimpointResource, + "simpoint-directory": SimpointDirectoryResource, "resource": Resource, } diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py index d6732bb49d..738e1281d9 100644 --- a/src/python/gem5/simulate/exit_event_generators.py +++ b/src/python/gem5/simulate/exit_event_generators.py @@ -28,7 +28,7 @@ from typing import Generator, Optional import m5.stats from ..components.processors.abstract_processor import AbstractProcessor from ..components.processors.switchable_processor import SwitchableProcessor -from ..utils.simpoint import SimPoint +from ..resources.resource import SimpointResource from m5.util import warn from pathlib import Path @@ -134,7 +134,7 @@ def skip_generator(): def simpoints_save_checkpoint_generator( - checkpoint_dir: Path, simpoint: SimPoint + checkpoint_dir: Path, simpoint: SimpointResource ): """ A generator for taking multiple checkpoints for SimPoints. It will save the diff --git a/src/python/gem5/utils/simpoint.py b/src/python/gem5/utils/simpoint.py index 9e861cc0a5..eab92e2291 100644 --- a/src/python/gem5/utils/simpoint.py +++ b/src/python/gem5/utils/simpoint.py @@ -24,10 +24,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from m5.util import fatal +from m5.util import fatal, warn from pathlib import Path from typing import List, Tuple -from gem5.resources.resource import Resource, CustomResource +from gem5.resources.resource import SimpointResource class SimPoint: @@ -39,7 +39,7 @@ class SimPoint: def __init__( self, - simpoint_resource: CustomResource = None, + simpoint_resource: SimpointResource = None, simpoint_interval: int = None, simpoint_file_path: Path = None, weight_file_path: Path = None, @@ -70,12 +70,19 @@ class SimPoint: The warmup_list only works correctly with sorted simpoint_list. """ + warn( + "This `SimPoint` class has been deprecated in favor of " + "`SimpointResource` and `SimpointDirectory` resource which may be " + "found in `gem5.resources.resource`. Please utilize these. This " + "`SimPoint` class will be removed in future releases of gem5." + ) + # initalize input if you're passing in a CustomResource if simpoint_resource is not None: simpoint_directory = str(simpoint_resource.get_local_path()) - simpoint_file_path = Path(simpoint_directory + "/simpoint.simpt") - weight_file_path = Path(simpoint_directory + "/simpoint.weight") + simpoint_file_path = simpoint_directory.get_simpoint_file() + weight_file_path = simpoint_resource.get_weight_file() simpoint_interval = ( simpoint_resource.get_metadata() .get("additional_metadata") diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py index 514894f8d2..9b5c2c67ff 100644 --- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py +++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py @@ -94,44 +94,44 @@ gem5_verify_config( length=constants.quick_tag, ) -# gem5_verify_config( -# name="test-simpoints-se-checkpoint", -# fixtures=(), -# verifiers=(), -# config=joinpath( -# config.base_dir, -# "configs", -# "example", -# "gem5_library", -# "checkpoints", -# "simpoints-se-checkpoint.py", -# ), -# config_args=[ -# "--checkpoint-path", -# joinpath(resource_path, "se_checkpoint_folder-save"), -# ], -# valid_isas=(constants.all_compiled_tag,), -# valid_hosts=constants.supported_hosts, -# length=constants.quick_tag, -# ) +gem5_verify_config( + name="test-simpoints-se-checkpoint", + fixtures=(), + verifiers=(), + config=joinpath( + config.base_dir, + "configs", + "example", + "gem5_library", + "checkpoints", + "simpoints-se-checkpoint.py", + ), + config_args=[ + "--checkpoint-path", + joinpath(resource_path, "se_checkpoint_folder-save"), + ], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) -# gem5_verify_config( -# name="test-simpoints-se-restore", -# fixtures=(), -# verifiers=(), -# config=joinpath( -# config.base_dir, -# "configs", -# "example", -# "gem5_library", -# "checkpoints", -# "simpoints-se-restore.py", -# ), -# config_args=[], -# valid_isas=(constants.all_compiled_tag,), -# valid_hosts=constants.supported_hosts, -# length=constants.quick_tag, -# ) +gem5_verify_config( + name="test-simpoints-se-restore", + fixtures=(), + verifiers=(), + config=joinpath( + config.base_dir, + "configs", + "example", + "gem5_library", + "checkpoints", + "simpoints-se-restore.py", + ), + config_args=[], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.quick_tag, +) if os.access("/dev/kvm", mode=os.R_OK | os.W_OK): # The x86-ubuntu-run uses KVM cores, this test will therefore only be run diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py index e0a8dddd07..f31e35d719 100644 --- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py +++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py @@ -26,6 +26,7 @@ import os import unittest +from pathlib import Path from gem5.resources.resource import * from gem5.isas import ISA @@ -155,6 +156,40 @@ class ResourceSpecializationSuite(unittest.TestCase): self.assertIsNone(resource.get_documentation()) self.assertIsNone(resource.get_source()) + def test_simpoint_directory_resource(self) -> None: + """Tests the loading of a Simpoint directory resource.""" + resource = obtain_resource( + resource_name="simpoint-directory-example", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, SimpointDirectoryResource) + + self.assertEquals( + "simpoint directory documentation.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) + + self.assertEquals(1000000, resource.get_simpoint_interval()) + self.assertEquals(1000000, resource.get_warmup_interval()) + self.assertEquals( + Path( + Path(self.get_resource_dir()) + / "simpoint-directory-example" + / "simpoint.simpt" + ), + resource.get_simpoint_file(), + ) + self.assertEquals( + Path( + Path(self.get_resource_dir()) + / "simpoint-directory-example" + / "simpoint.weight" + ), + resource.get_weight_file(), + ) + self.assertEquals("Example Workload", resource.get_workload_name()) + def test_simpoint_resource(self) -> None: """Tests the loading of a Simpoint resource.""" resource = obtain_resource( @@ -168,6 +203,12 @@ class ResourceSpecializationSuite(unittest.TestCase): "simpoint documentation.", resource.get_documentation() ) self.assertIsNone(resource.get_source()) + self.assertIsNone(resource.get_local_path()) + + self.assertEquals(1000000, resource.get_simpoint_interval()) + self.assertEquals(23445, resource.get_warmup_interval()) + self.assertEquals([2, 3, 4, 15], resource.get_simpoint_list()) + self.assertEquals([0.1, 0.2, 0.4, 0.3], resource.get_weight_list()) def test_file_resource(self) -> None: """Tests the loading of a FileResource.""" diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json index 77ffc10705..01671b564b 100644 --- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json +++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json @@ -86,14 +86,28 @@ "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar" }, { - "type" : "simpoint", - "name" : "simpoint-example", - "documentation" : "simpoint documentation.", + "type": "simpoint-directory", + "name": "simpoint-directory-example", + "documentation": "simpoint directory documentation.", "is_zipped" : false, - "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace", + "md5sum" : "3fcffe3956c8a95e3fb82e232e2b41fb", "source" : null, "is_tar_archive" : true, - "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar" - } + "url": "{url_base}/simpoints/x86-print-this-15000-simpoints-20221013.tar", + "simpoint_interval": 1000000, + "warmup_interval": 1000000, + "simpoint_file": "simpoint.simpt", + "weight_file": "simpoint.weight", + "workload_name": "Example Workload" + }, + { + "type": "simpoint", + "name": "simpoint-example", + "documentation": "simpoint documentation.", + "simpoint_interval": 1000000, + "warmup_interval": 23445, + "simpoint_list" : [2,3,4,15], + "weight_list" : [0.1, 0.2, 0.4, 0.3] + } ] } From 717d3b239cbc7a8371bfd69b4d67fc7c57b3f104 Mon Sep 17 00:00:00 2001 From: Zhantong Qiu Date: Fri, 6 Jan 2023 16:11:57 -0800 Subject: [PATCH 207/492] base,python: Added PcCountPair type and parameter This commit introduces a PcCountPair type that stores a Program Counter address and an integer of counts for the Program Counter address. The PcCountPair can be used in the same way and hashable in both C++ and Python. Change-Id: I66d93e2c6a1d286cb9dd795ba97f8d887f67d503 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67193 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/cpu/probes/pc_count_pair.hh | 99 +++++++++++++++++++++++++++++++++ src/python/m5/params.py | 41 ++++++++++++++ src/python/pybind11/core.cc | 27 +++++++++ 3 files changed, 167 insertions(+) create mode 100644 src/cpu/probes/pc_count_pair.hh diff --git a/src/cpu/probes/pc_count_pair.hh b/src/cpu/probes/pc_count_pair.hh new file mode 100644 index 0000000000..fd6bc639fe --- /dev/null +++ b/src/cpu/probes/pc_count_pair.hh @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2023 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __PC_COUNT_PAIR_HH__ +#define __PC_COUNT_PAIR_HH__ + +#include "base/types.hh" + +namespace gem5 +{ + +class PcCountPair +{ + + private: + + /** The Program Counter address */ + Addr pc; + /** The count of the Program Counter address */ + int count; + + public: + + /** Explicit constructor assigning the pc and count values */ + explicit constexpr PcCountPair(Addr _pc, int _count) : + pc(_pc), count(_count) {} + + /** Default constructor for parameter classes */ + PcCountPair() : pc(0), count(0) {} + + /** Returns the Program Counter address */ + constexpr Addr getPC() const { return pc; } + /** Returns the count of the Program */ + constexpr int getCount() const { return count; } + + /** Greater than comparison */ + constexpr bool + operator>(const PcCountPair& cc) const + { + return count > cc.getCount(); + } + + /** Equal comparison */ + constexpr bool + operator==(const PcCountPair& cc) const + { + return (pc == cc.getPC() && count == cc.getCount()); + } + + /** String format */ + std::string + to_string() const + { + std::string s = "(" + std::to_string(pc) + + "," + std::to_string(count) + ")"; + return s; + } + + /** Enable hashing for this parameter */ + struct HashFunction + { + size_t operator()(const PcCountPair& item) const + { + size_t xHash = std::hash()(item.pc); + size_t yHash = std::hash()(item.count); + return xHash * 2 + yHash; + } + }; + +}; + +} // namespace gem5 + +#endif // __PC_COUNT_PAIR_HH__ diff --git a/src/python/m5/params.py b/src/python/m5/params.py index e76380bc40..92e913b2f0 100644 --- a/src/python/m5/params.py +++ b/src/python/m5/params.py @@ -854,6 +854,46 @@ class Addr(CheckedInt): return "0x%x" % int(val) +class PcCountPair(ParamValue): + # This parameter stores a Program Counter address and the a count value for + # the Program Counter address + cxx_type = "PcCountPair" + cmd_line_settable = True + + def __init__(self, _pc, _count): + self.pc = _pc + self.count = _count + + def get_pc(self): + return self.pc + + def get_count(self): + return self.count + + def getValue(self): + # convert Python PcCountPair into C++ PcCountPair + from _m5.pc import PcCountPair + + return PcCountPair(self.pc, self.count) + + def __str__(self): + return "(%i,%i)" % (self.pc, self.count) + + def __eq__(self, other): + return self.pc == other.get_pc() and self.count == other.get_count() + + def __hash__(self): + return hash((int(self.pc), int(self.count))) + + @classmethod + def cxx_predecls(cls, code): + code('#include "cpu/probes/pc_count_pair.hh"') + + @classmethod + def pybind_predecls(cls, code): + code('#include "cpu/probes/pc_count_pair.hh"') + + class AddrRange(ParamValue): cxx_type = "AddrRange" @@ -2426,4 +2466,5 @@ __all__ = [ "VectorMasterPort", "VectorSlavePort", "DeprecatedParam", + "PcCountPair", ] diff --git a/src/python/pybind11/core.cc b/src/python/pybind11/core.cc index 89466750d0..bd83a74331 100644 --- a/src/python/pybind11/core.cc +++ b/src/python/pybind11/core.cc @@ -58,6 +58,7 @@ #include "sim/drain.hh" #include "sim/serialize.hh" #include "sim/sim_object.hh" +#include "cpu/probes/pc_count_pair.hh" namespace py = pybind11; @@ -163,6 +164,31 @@ init_range(py::module_ &m_native) m.def("RangeSize", &RangeSize); } +static void +init_pc(py::module_ &m_native) +{ + py::module_ m = m_native.def_submodule("pc"); + py::class_(m, "PcCountPair") + .def(py::init<>()) + .def(py::init()) + .def("__eq__", [](const PcCountPair& self, py::object other) { + py::int_ pyPC = other.attr("get_pc")(); + py::int_ pyCount = other.attr("get_count")(); + uint64_t cPC = pyPC.cast(); + int cCount = pyCount.cast(); + return (self.getPC() == cPC && self.getCount() == cCount); + }) + .def("__hash__", [](const PcCountPair& self){ + py::int_ pyPC = py::cast(self.getPC()); + py::int_ pyCount = py::cast(self.getCount()); + return py::hash(py::make_tuple(pyPC, pyCount)); + }) + .def("__str__", &PcCountPair::to_string) + .def("get_pc", &PcCountPair::getPC) + .def("get_count", &PcCountPair::getCount) + ; +} + static void init_net(py::module_ &m_native) { @@ -307,6 +333,7 @@ pybind_init_core(py::module_ &m_native) init_range(m_native); init_net(m_native); init_loader(m_native); + init_pc(m_native); } } // namespace gem5 From 0d129a6bf2e6b9f7d3ec102376151e8752a3afde Mon Sep 17 00:00:00 2001 From: Zhantong Qiu Date: Fri, 6 Jan 2023 16:58:06 -0800 Subject: [PATCH 208/492] sim: Added PcCountTracker and PcCountTrackerManager PcCountTracker is a probelistener that connects to one core and listens for a list of Program Counter addresses(PCs). It notifys the PcCountTrackerManager every time it encounters a Program Counter address in the list. PcCountTrackerManager is a SimObject that is responsible for keeping track of a list of PC-count pairs and the number of time a particular PC has been executed globally. This patch adds a way to track the number of times a set of specific PCs have been executed. Change-Id: I8f47bfa7e29aa2bb6ab817417266033439b85d51 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67194 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/cpu/probes/PcCountTracker.py | 64 +++++++++++ src/cpu/probes/SConscript | 37 ++++++ src/cpu/probes/pc_count_tracker.cc | 70 ++++++++++++ src/cpu/probes/pc_count_tracker.hh | 72 ++++++++++++ src/cpu/probes/pc_count_tracker_manager.cc | 88 +++++++++++++++ src/cpu/probes/pc_count_tracker_manager.hh | 124 +++++++++++++++++++++ 6 files changed, 455 insertions(+) create mode 100644 src/cpu/probes/PcCountTracker.py create mode 100644 src/cpu/probes/SConscript create mode 100644 src/cpu/probes/pc_count_tracker.cc create mode 100644 src/cpu/probes/pc_count_tracker.hh create mode 100644 src/cpu/probes/pc_count_tracker_manager.cc create mode 100644 src/cpu/probes/pc_count_tracker_manager.hh diff --git a/src/cpu/probes/PcCountTracker.py b/src/cpu/probes/PcCountTracker.py new file mode 100644 index 0000000000..259ec68f8e --- /dev/null +++ b/src/cpu/probes/PcCountTracker.py @@ -0,0 +1,64 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.util.pybind import * +from m5.objects.Probe import ProbeListenerObject +from m5.objects import SimObject + + +class PcCountTrackerManager(SimObject): + """This class manages global PC-count pair tracking. + It keeps the global counters for all target PC-count pairs and raises exit + events when a PC executed a target number of times. + It gets called every time a PcCountTracker encounters a target PC. + """ + + type = "PcCountTrackerManager" + cxx_header = "cpu/probes/pc_count_tracker_manager.hh" + cxx_class = "gem5::PcCountTrackerManager" + + cxx_exports = [ + PyBindMethod("getPcCount"), + PyBindMethod("getCurrentPcCountPair"), + ] + + targets = VectorParam.PcCountPair("the target PC Count pairs") + + +class PcCountTracker(ProbeListenerObject): + """This probe listener tracks the number of times a particular pc has been + executed. It needs to be connected to a manager to track the global + information. + """ + + type = "PcCountTracker" + cxx_header = "cpu/probes/pc_count_tracker.hh" + cxx_class = "gem5::PcCountTracker" + + targets = VectorParam.PcCountPair("the target PC Count pairs") + core = Param.BaseCPU("the connected cpu") + ptmanager = Param.PcCountTrackerManager("the PcCountTracker manager") diff --git a/src/cpu/probes/SConscript b/src/cpu/probes/SConscript new file mode 100644 index 0000000000..c96ca78a0c --- /dev/null +++ b/src/cpu/probes/SConscript @@ -0,0 +1,37 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Import("*") + +if not env["CONF"]["USE_NULL_ISA"]: + SimObject( + "PcCountTracker.py", + sim_objects=["PcCountTracker", "PcCountTrackerManager"], + ) + Source("pc_count_tracker.cc") + Source("pc_count_tracker_manager.cc") + + DebugFlag("PcCountTracker") diff --git a/src/cpu/probes/pc_count_tracker.cc b/src/cpu/probes/pc_count_tracker.cc new file mode 100644 index 0000000000..184db9a6a3 --- /dev/null +++ b/src/cpu/probes/pc_count_tracker.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/probes/pc_count_tracker.hh" + + +namespace gem5 +{ + +PcCountTracker::PcCountTracker(const PcCountTrackerParams &p) + : ProbeListenerObject(p), + cpuptr(p.core), + manager(p.ptmanager) +{ + if (!cpuptr || !manager) { + fatal("%s is NULL", !cpuptr ? "CPU": "PcCountTrackerManager"); + } + for (int i = 0; i < p.targets.size(); i++) { + // initialize the set of targeting Program Counter addresses + targetPC.insert(p.targets[i].getPC()); + } +} + +void +PcCountTracker::regProbeListeners() +{ + // connect the probe listener with the probe "RetriedInstsPC" in the + // corresponding core. + // when "RetiredInstsPC" notifies the probe listener, then the function + // 'check_pc' is automatically called + typedef ProbeListenerArg PcCountTrackerListener; + listeners.push_back(new PcCountTrackerListener(this, "RetiredInstsPC", + &PcCountTracker::checkPc)); +} + +void +PcCountTracker::checkPc(const Addr& pc) { + if (targetPC.find(pc) != targetPC.end()) { + // if the PC is one of the target PCs, then notify the + // PcCounterTrackerManager by calling its `check_count` function + manager->checkCount(pc); + } +} + +} // namespace gem5 diff --git a/src/cpu/probes/pc_count_tracker.hh b/src/cpu/probes/pc_count_tracker.hh new file mode 100644 index 0000000000..8f54e1ad72 --- /dev/null +++ b/src/cpu/probes/pc_count_tracker.hh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_PROBES_PC_COUNT_TRACKER_HH__ +#define __CPU_PROBES_PC_COUNT_TRACKER_HH__ + +#include + +#include "cpu/probes/pc_count_tracker_manager.hh" +#include "params/PcCountTracker.hh" +#include "sim/probe/probe.hh" + +namespace gem5 +{ + +class PcCountTracker : public ProbeListenerObject +{ + public: + PcCountTracker(const PcCountTrackerParams ¶ms); + + /** setup the probelistener */ + virtual void regProbeListeners(); + + /** + * this function is called when the probelistener receives signal from the + * probe + * + * @param pc the targeting Program Counter address + */ + void checkPc(const Addr& pc); + + private: + /** + * a set of Program Counter addresses that should notify the + * PcCounterTrackerManager for + */ + std::unordered_set targetPC; + + /** the core this PcCountTracker is tracking at */ + BaseCPU *cpuptr; + + /** the PcCounterTrackerManager */ + PcCountTrackerManager *manager; +}; +} + +#endif // __CPU_PROBES_PC_COUNT_TRACKER_HH__ diff --git a/src/cpu/probes/pc_count_tracker_manager.cc b/src/cpu/probes/pc_count_tracker_manager.cc new file mode 100644 index 0000000000..88d7dda568 --- /dev/null +++ b/src/cpu/probes/pc_count_tracker_manager.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/probes/pc_count_tracker_manager.hh" + +namespace gem5 +{ + +PcCountTrackerManager::PcCountTrackerManager( + const PcCountTrackerManagerParams &p) + : SimObject(p) +{ + currentPair = PcCountPair(0,0); + ifListNotEmpty = true; + + for (int i = 0 ; i < p.targets.size() ; i++) { + // initialize the counter for the inputted PC Count pair + // unordered_map does not allow duplicate, so counter won't + // have duplicates + counter.insert(std::make_pair(p.targets[i].getPC(),0)); + // store all the PC Count pair into the targetPair set + targetPair.insert(p.targets[i]); + } + DPRINTF(PcCountTracker, + "total %i PCs in counter\n", counter.size()); + DPRINTF(PcCountTracker, + "all targets: \n%s", printAllTargets()); +} + +void +PcCountTrackerManager::checkCount(Addr pc) +{ + + if(ifListNotEmpty) { + int count = ++counter.find(pc)->second; + // increment the counter of the encountered PC address by 1 + + currentPair = PcCountPair(pc,count); + // update the current PC Count pair + if(targetPair.find(currentPair) != targetPair.end()) { + // if the current PC Count pair is one of the target pairs + DPRINTF(PcCountTracker, + "pc:%s encountered\n", currentPair.to_string()); + + exitSimLoopNow("simpoint starting point found"); + // raise the SIMPOINT_BEGIN exit event + + targetPair.erase(currentPair); + // erase the encountered PC Count pair from the target pairs + DPRINTF(PcCountTracker, + "There are %i targets remained\n", targetPair.size()); + } + + if(targetPair.empty()) { + // if all target PC Count pairs are encountered + DPRINTF(PcCountTracker, + "all targets are encountered.\n"); + ifListNotEmpty = false; + } + } +} + +} diff --git a/src/cpu/probes/pc_count_tracker_manager.hh b/src/cpu/probes/pc_count_tracker_manager.hh new file mode 100644 index 0000000000..00742ff239 --- /dev/null +++ b/src/cpu/probes/pc_count_tracker_manager.hh @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2022 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_PROBES_PC_COUNT_TRACKER_MANAGER_HH__ +#define __CPU_PROBES_PC_COUNT_TRACKER_MANAGER_HH__ + +#include +#include + +#include "cpu/base.hh" +#include "params/PcCountTrackerManager.hh" +#include "sim/sim_exit.hh" +#include "debug/PcCountTracker.hh" + +namespace gem5 +{ + + +class PcCountTrackerManager : public SimObject { + public: + PcCountTrackerManager(const PcCountTrackerManagerParams ¶ms); + + /** this function is called when PcCountTrackerProbeListener finds a target + * PC + */ + void checkCount(Addr pc); + + private: + /** a counter that stores all the target PC addresses and the number + * of times the target PC has been executed + */ + std::unordered_map counter; + + /** a set that stores all the PC Count pairs that should raise an + * exit event at + */ + std::unordered_set targetPair; + + /** the current PC Count pair */ + PcCountPair currentPair; + + /** when all the PC Count pairs in the `targetPair` are encountered, + * and the PCCOUNTTRACK_END exit event is raised, this boolean + * variable becomes false and is used to stop the `check_count` + * from functioning. This is default as true. + */ + bool ifListNotEmpty; + + public: + + /** this function returns the corresponding value of count for the + * inputted Program Counter address. If the PC address does not + * exist in the counter, then it returns a -1. + * + * @param pc the targeting Program Counter address + * @return the corresponding value of count for the inputted Program + * Counter address + */ + int + getPcCount(Addr pc) const + { + if (counter.find(pc) != counter.end()) { + return counter.find(pc)->second; + } + return -1; + } + + /** this function returns the current PC Count pair + * + * @return current PC Count pair + */ + PcCountPair + getCurrentPcCountPair() const + { + return currentPair; + } + + /** this function print all targets + * + * @return formatted string that contains all targets + */ + std::string + printAllTargets() const + { + std::string s; + for(auto itr = targetPair.begin(); + itr != targetPair.end(); + ++itr) { + s += itr->to_string(); + s += "\n"; + } + return s; + } +}; + +} + +#endif // __CPU_PROBES_PC_COUNT_TRACKER_MANAGER_HH__ From 41816bf0307c8ef6bc40f032e2e65c22bed26262 Mon Sep 17 00:00:00 2001 From: Zhantong Qiu Date: Fri, 6 Jan 2023 17:13:27 -0800 Subject: [PATCH 209/492] stdlib: Added stdlib LoopPoint classes LoopPoint is a multithreaded workload sampling method that targets PCs and PC execution counts. The main idea for LoopPoint is to base the beginning and end of the simjulation sample on the number of times a particular loop (PC) has been executed globally across all threads in a region that partitioned with a set length of instruction counts. This in some senses generalizes SimPoint which use the instruction count of a single thread. The link to the paper: https://ieeexplore.ieee.org/document/9773236 The LoopPointCheckpoint is designed to take in LoopPoint data file and generate the information needed to take checkpoints for LoopPoint regions(warmup regions+simulation region) The LoopPointRestore is designed to take in the LoopPOint data file and generate information needed to to restore a checkpoint taken by the LoopPOintCheckpoint. The LoopPoint is the parent class for LoopPointCheckpoint and LoopPointRestore. Change-Id: I595b0ff9d350c7c496639748a9c63ecc61fbaec9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67195 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/python/SConscript | 1 + .../components/processors/abstract_core.py | 9 +- .../components/processors/base_cpu_core.py | 20 +- src/python/gem5/utils/looppoint.py | 401 ++++++++++++++++++ 4 files changed, 429 insertions(+), 2 deletions(-) create mode 100644 src/python/gem5/utils/looppoint.py diff --git a/src/python/SConscript b/src/python/SConscript index aeeb8925a3..68b5e1d926 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -240,6 +240,7 @@ PySource('gem5.components.processors', PySource('gem5.components.processors', 'gem5/components/processors/switchable_processor.py') PySource('gem5.utils', 'gem5/utils/simpoint.py') +PySource('gem5.utils', 'gem5/utils/looppoint.py') PySource('gem5.components.processors', 'gem5/components/processors/traffic_generator_core.py') PySource('gem5.components.processors', diff --git a/src/python/gem5/components/processors/abstract_core.py b/src/python/gem5/components/processors/abstract_core.py index 58296bca3b..8259df8a8b 100644 --- a/src/python/gem5/components/processors/abstract_core.py +++ b/src/python/gem5/components/processors/abstract_core.py @@ -29,7 +29,8 @@ from typing import Optional, List from ...isas import ISA -from m5.objects import BaseMMU, Port, SubSystem +from m5.objects import BaseMMU, Port, SubSystem, PcCountTrackerManager +from m5.params import PcCountPair class AbstractCore(SubSystem): @@ -155,3 +156,9 @@ class AbstractCore(SubSystem): instruction stop is setup differently dependent on this. """ raise NotImplementedError("This core type does not support MAX_INSTS") + + @abstractmethod + def add_pc_tracker_probe( + self, target_pair: List[PcCountPair], manager: PcCountTrackerManager + ) -> None: + raise NotImplementedError diff --git a/src/python/gem5/components/processors/base_cpu_core.py b/src/python/gem5/components/processors/base_cpu_core.py index 631fd0ad0e..c75c0029cf 100644 --- a/src/python/gem5/components/processors/base_cpu_core.py +++ b/src/python/gem5/components/processors/base_cpu_core.py @@ -33,7 +33,15 @@ from ...runtime import get_runtime_isa from ...utils.override import overrides from ...utils.requires import requires -from m5.objects import BaseMMU, Port, BaseCPU, Process +from m5.objects import ( + BaseMMU, + Port, + BaseCPU, + Process, + PcCountTracker, + PcCountTrackerManager, +) +from m5.params import PcCountPair class BaseCPUCore(AbstractCore): @@ -169,3 +177,13 @@ class BaseCPUCore(AbstractCore): self.core.scheduleInstStopAnyThread(inst) else: self.core.max_insts_any_thread = inst + + @overrides(AbstractCore) + def add_pc_tracker_probe( + self, target_pair: List[PcCountPair], manager: PcCountTrackerManager + ) -> None: + pair_tracker = PcCountTracker() + pair_tracker.targets = target_pair + pair_tracker.core = self.core + pair_tracker.ptmanager = manager + self.core.probeListener = pair_tracker diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py new file mode 100644 index 0000000000..b681e75fee --- /dev/null +++ b/src/python/gem5/utils/looppoint.py @@ -0,0 +1,401 @@ +# Copyright (c) 2022 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from m5.util import fatal +from m5.params import PcCountPair +from pathlib import Path +from typing import List, Dict +from gem5.components.processors.abstract_processor import AbstractProcessor +from m5.objects import PcCountTrackerManager +import csv +import re +import json + + +class LoopPoint: + """ + This LoopPoint class is used to manage the information needed for LoopPoint + in workload + """ + + def __init__( + self, + targets: List[PcCountPair], + regions: Dict[PcCountPair, int], + json_file: Dict[int, Dict], + ) -> None: + """ + :param targets: a list of PcCountPair that are used to generate exit + event at when the PcCountTrackerManager encounter this PcCountPair in + execution + :param regions: a dictionary used to find the corresponding region id + for the significant PcCountPair. This is mainly used to ensure + checkpoints are taken in the correct PcCountPair or relative counts are + updated at the correct count + :param json_file: all the LoopPoint data including relative counts and + multiplier are stored in this parameter. It can be outputted as a json + file. + """ + + self._manager = PcCountTrackerManager() + self._manager.targets = targets + self._targets = targets + self._regions = regions + self._json_file = json_file + + def setup_processor( + self, + processor: AbstractProcessor, + ) -> None: + """ + This function is used to setup a PC tracker in all the cores and + connect all the tracker to the PC tracker manager to perform + multithread PC tracking + :param processor: the processor used in the simulation configuration + """ + for core in processor.get_cores(): + core.add_pc_tracker_probe(self._targets, self._manager) + + def update_relatives_counts(self) -> None: + """ + This function is used to update the relative count for restore used. + The new relative count will be stored in the _json_file and can be + outputted into a json file by calling the output_json_file function. + """ + current_pair = self._manager.getCurrentPcCountPair() + if current_pair in self._regions: + rid = self._regions[current_pair] + region = self._json_file[rid]["simulation"] + if "warmup" in self._json_file[rid]: + # if this region has a warmup interval, + # then update the relative count for the + # start of the simulation region + start = region["start"]["pc"] + temp = region["start"]["global"] - self._manager.getPcCount( + start + ) + self._json_file[rid]["simulation"]["start"]["relative"] = int( + temp + ) + end = region["end"]["pc"] + temp = region["end"]["global"] - self._manager.getPcCount(end) + self._json_file[rid]["simulation"]["end"]["relative"] = int(temp) + + def output_json_file( + self, input_indent: int = 4, filename: str = "outdir.json" + ) -> Dict[int, Dict]: + """ + This function is used to output the _json_file into a json file + :param input_indent: the indent value of the json file + :param filename: the name of the output file + """ + with open(filename, "w") as file: + json.dump(self._json_file, file, indent=input_indent) + + def get_current_region(self) -> int: + """ + This function returns the region id if the current PC Count pair is + significant(e.x. beginning of the checkpoint), otherwise, it returns + a '-1' to indicate the current PC Count pair is not significant + """ + current_pair = self._manager.getCurrentPcCountPair() + if current_pair in self._regions: + return self._regions[current_pair] + return -1 + + def get_current_pair(self) -> PcCountPair: + """ + This function returns the current PC Count pair + """ + return self._manager.getCurrentPcCountPair() + + def get_regions(self) -> Dict[PcCountPair, int]: + """ + This function returns the complete dictionary of _regions + """ + return self._regions + + def get_targets(self) -> List[PcCountPair]: + """ + This function returns the complete list of _targets + """ + return self._targets + + +class LoopPointCheckpoint(LoopPoint): + def __init__(self, looppoint_file: Path, if_csv: bool) -> None: + """ + This class is specifically designed to take in the LoopPoint data file + and generate the information needed to take checkpoints for LoopPoint + regions(warmup region+simulation region) + :param looppoint_file: the director of the LoopPoint data file + :param if_csv: if the file is a csv file, then it is True. If the file + is a json file, then it is False + """ + + _json_file = {} + _targets = [] + _region_id = {} + + if if_csv: + self.profile_csv(looppoint_file, _targets, _json_file, _region_id) + else: + self.profile_json(looppoint_file, _targets, _json_file, _region_id) + + super().__init__( + _targets, + _region_id, + _json_file, + ) + + def profile_csv( + self, + looppoint_file_path: Path, + targets: List[PcCountPair], + json_file: Dict[int, Dict], + region_id: Dict[PcCountPair, int], + ) -> None: + """ + This function profiles the csv LoopPoint data file into three variables + to take correct checkpoints for LoopPoint + :param looppoint_file_path: the director of the LoopPoint data file + :param targets: a list of PcCountPair + :param json_file: a dictionary for all the LoopPoint data + :param region_id: a dictionary for all the significant PcCountPair and + its corresponding region id + """ + + # This section is hard-coded to parse the data in the csv file. + # The csv file is assumed to have a constant format. + with open(looppoint_file_path, newline="") as csvfile: + reader = csv.reader(csvfile, delimiter=" ", quotechar="|") + for row in reader: + if len(row) > 1: + if row[0] == "cluster": + # if it is a simulation region + line = row[4].split(",") + start = PcCountPair(int(line[3], 16), int(line[6])) + end = PcCountPair(int(line[7], 16), int(line[10])) + if int(line[2]) in json_file: + # if this region was created in the json_file + json_file[int(line[2])]["simulation"] = { + "start": {"pc": int(line[3], 16)} + } + else: + json_file[int(line[2])] = { + "simulation": { + "start": {"pc": int(line[3], 16)} + } + } + json_file[int(line[2])]["simulation"]["start"][ + "global" + ] = int(line[6]) + json_file[int(line[2])]["simulation"]["end"] = { + "pc": int(line[7], 16) + } + json_file[int(line[2])]["simulation"]["end"][ + "global" + ] = int(line[10]) + json_file[int(line[2])]["multiplier"] = float(line[14]) + targets.append(start) + targets.append(end) + # store all the PC Count pairs from the file to the + # targets list + elif row[0] == "Warmup": + line = row[3].split(",") + start = PcCountPair(int(line[3], 16), int(line[6])) + end = PcCountPair(int(line[7], 16), int(line[10])) + if int(line[0]) in json_file: + json_file[int(line[0])]["warmup"] = { + "start": {"pc": int(line[3], 16)} + } + else: + json_file[int(line[0])] = { + "warmup": {"start": {"pc": int(line[3], 16)}} + } + json_file[int(line[0])]["warmup"]["start"][ + "count" + ] = int(line[6]) + json_file[int(line[0])]["warmup"]["end"] = { + "pc": int(line[7], 16) + } + json_file[int(line[0])]["warmup"]["end"][ + "count" + ] = int(line[10]) + targets.append(start) + targets.append(end) + # store all the PC Count pairs from the file to the + # targets list + + for rid, region in json_file.items(): + # this loop iterates all the regions and find the significant PC + # Count pair for the region + if "warmup" in region: + # if the region has a warmup interval, then the checkpoint + # should be taken at the start of the warmup interval + start = PcCountPair( + region["warmup"]["start"]["pc"], + region["warmup"]["start"]["count"], + ) + else: + # if the region does not have a warmup interval, then the + # checkpoint should be taken at the start of the simulation + # region + start = PcCountPair( + region["simulation"]["start"]["pc"], + region["simulation"]["start"]["global"], + ) + region_id[start] = rid + + def profile_json( + self, + looppoint_file_path: Path, + targets: List[PcCountPair], + json_file: Dict[int, Dict], + region_id: Dict[PcCountPair, int], + ) -> None: + """ + This function profiles the json LoopPoint data file into three + variables to take correct checkpoints for LoopPoint + :param looppoint_file_path: the director of the LoopPoint data file + :param targets: a list of PcCountPair + :param json_file: a dictionary for all the LoopPoint data + :param region_id: a dictionary for all the significant PcCountPair and + its corresponding region id + """ + + with open(looppoint_file_path) as file: + json_file = json.load(file) + # load all json information into the json_file variable + for rid, region in json_file.items(): + # iterates all regions + sim_start = PcCountPair( + region["simulation"]["start"]["pc"], + region["simulation"]["start"]["global"], + ) + targets.append(sim_start) + # store all PC Count pairs in the file into targets list + end = PcCountPair( + region["simulation"]["end"]["pc"], + region["simulation"]["end"]["global"], + ) + targets.append(end) + if "warmup" in region: + # if there is a warmup in the region, then the checkpoint + # should be taken at the start of the warmup interval + start = PcCountPair( + region["warmup"]["start"]["pc"], + region["warmup"]["start"]["count"], + ) + targets.append(start) + end = PcCountPair( + region["warmup"]["end"]["pc"], + region["warmup"]["end"]["count"], + ) + targets.append(end) + else: + # if there is not a warmup interval in the region, then the + # checkpoint should be taken at the start of the simulation + # region + start = sim_start + region_id[start] = rid + + +class LoopPointRestore(LoopPoint): + def __init__(self, looppoint_file: Path, checkpoint_path: Path) -> None: + """ + This class is specifically designed to take in the LoopPoint data file and + generator information needed to restore a checkpoint taken by the + LoopPointCheckPoint. + :param looppoint_file: a json file generated by gem5 that has all the + LoopPoint data information + :param checkpoint_path: the director of the checkpoint taken by the gem5 + standard library looppoint_save_checkpoint_generator + + """ + + _json_file = {} + _targets = [] + _region_id = {} + + self.profile_restore( + looppoint_file, checkpoint_path, _targets, _json_file, _region_id + ) + + super().__init__( + _targets, + _region_id, + _json_file, + ) + + def profile_restore( + self, + looppoint_file_path: Path, + checkpoint_dir: Path, + targets: List[PcCountPair], + json_file: Dict[int, Dict], + region_id: Dict[PcCountPair, int], + ) -> None: + """ + This function is used to profile data from the LoopPoint data file to + information needed to restore the LoopPoint checkpoint + :param looppoint_file_path: the director of the LoopPoint data file + :param targets: a list of PcCountPair + :param json_file: a dictionary for all the LoopPoint data + :param region_id: a dictionary for all the significant PcCountPair and + its corresponding region id + """ + regex = re.compile(r"cpt.Region([0-9]+)") + rid = regex.findall(checkpoint_dir.as_posix())[0] + # finds out the region id from the directory name + with open(looppoint_file_path) as file: + json_file = json.load(file) + if rid not in json_file: + # if the region id does not exist in the LoopPoint data file + # raise a fatal message + fatal(f"{rid} is not a valid region\n") + region = json_file[rid] + if "warmup" in region: + if "relative" not in region["simulation"]["start"]: + # if there are not relative counts for the PC Count pair + # then it means there is not enough information to restore + # this checkpoint + fatal(f"region {rid} doesn't have relative count info\n") + start = PcCountPair( + region["simulation"]["start"]["pc"], + region["simulation"]["start"]["relative"], + ) + region_id[start] = rid + targets.append(start) + if "relative" not in region["simulation"]["end"]: + fatal(f"region {rid} doesn't have relative count info\n") + end = PcCountPair( + region["simulation"]["end"]["pc"], + region["simulation"]["end"]["relative"], + ) + region_id[end] = rid + targets.append(end) From 82c587bd8834fb0700414cb6d81e76f46d588d8f Mon Sep 17 00:00:00 2001 From: Zhantong Qiu Date: Fri, 6 Jan 2023 17:25:26 -0800 Subject: [PATCH 210/492] stdlib: Allow se_binary_workload to setup LoopPoints Added a set_se_looppoint_workload function to take in information for workload and a stdlib LoopPoint object that stores all the information the workload needed to run the LoopPoint sampling method. Added a get_looppoint function to return the stdlib LoopPoint object. Change-Id: I7afc5c4c743256f7df97345f331b6f72b7a5fd07 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67196 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- .../components/boards/se_binary_workload.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index 31931106c9..404a78458f 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from .abstract_board import AbstractBoard + from ...resources.resource import ( FileResource, AbstractResource, @@ -34,6 +35,8 @@ from ...resources.resource import ( SimpointDirectoryResource, ) +from gem5.utils.looppoint import LoopPoint + from m5.objects import SEWorkload, Process from typing import Optional, List, Union @@ -170,3 +173,47 @@ class SEBinaryWorkload: if getattr(self, "_simpoint_resource", None): return self._simpoint_resource raise Exception("This board does not have a simpoint set.") + + def set_se_looppoint_workload( + self, + binary: AbstractResource, + arguments: List[str] = [], + looppoint: Optional[Union[AbstractResource, LoopPoint]] = None, + checkpoint: Optional[Union[Path, AbstractResource]] = None, + ) -> None: + """Set up the system to run a LoopPoint workload. + + **Limitations** + * Dynamically linked executables are partially supported when the host + ISA and the simulated ISA are the same. + + :param binary: The resource encapsulating the binary to be run. + :param arguments: The input arguments for the binary + :param looppoint: The LoopPoint object that contain all the information + gather from the LoopPoint files and a LoopPointManager that will raise + exit events for LoopPoints + """ + + if isinstance(looppoint, AbstractResource): + self._looppoint_object = LoopPoint(looppoint) + else: + assert isinstance(looppoint, LoopPoint) + self._looppoint_object = looppoint + + self._looppoint_object.setup_processor(self.get_processor()) + + # Call set_se_binary_workload after LoopPoint setup is complete + self.set_se_binary_workload( + binary=binary, + arguments=arguments, + checkpoint=checkpoint, + ) + + def get_looppoint(self) -> LoopPoint: + """ + Returns the LoopPoint object set. If no LoopPoint object has been set + an exception is thrown. + """ + if getattr(self, "_looppoint_object", None): + return self._looppoint_object + raise Exception("This board does not have a looppoint set.") From afbca3b6e7a7a4eb919d9b919e292de3090efd60 Mon Sep 17 00:00:00 2001 From: Zhantong Qiu Date: Fri, 6 Jan 2023 17:28:00 -0800 Subject: [PATCH 211/492] stdlib: Added LoopPoint checkpoint specific generator Added looppoint_save_checkpoint_generator to take checkpoints for LoopPoint methodology. Users can decide to update the relative counts storing in the LoopPoint module and exit when all the target PC-count pairs are encountered or not. Change-Id: Id1cf1516f4fa838e20a67530e94b361e42ca09f3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67197 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- .../gem5/simulate/exit_event_generators.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py index 738e1281d9..82eba17543 100644 --- a/src/python/gem5/simulate/exit_event_generators.py +++ b/src/python/gem5/simulate/exit_event_generators.py @@ -29,6 +29,7 @@ import m5.stats from ..components.processors.abstract_processor import AbstractProcessor from ..components.processors.switchable_processor import SwitchableProcessor from ..resources.resource import SimpointResource +from gem5.utils.looppoint import LoopPoint from m5.util import warn from pathlib import Path @@ -167,3 +168,46 @@ def simpoints_save_checkpoint_generator( yield False else: yield True + + +def looppoint_save_checkpoint_generator( + checkpoint_dir: Path, + looppoint: LoopPoint, + update_relatives: bool = True, + exit_when_empty: bool = True, +): + """ + A generator for taking a checkpoint for LoopPoint. It will save the + checkpoints in the checkpoint_dir path with the Region id. + (i.e. "cpt.Region10) It only takes a checkpoint if the current PC Count + pair is a significant PC Count Pair. This is determined in the LoopPoint + module. The simulation loop continues after exiting this generator. + :param checkpoint_dir: where to save the checkpoints + :param loopoint: the looppoint object used in the configuration script + :param update_relative: if the generator should update the relative count + information in the output json file, then it should be True. It is default + as True. + :param exit_when_empty: if the generator should exit the simulation loop if + all PC paris have been discovered, then it should be True. It is default as + True. + """ + if exit_when_empty: + total_pairs = len(looppoint.get_targets()) + else: + total_pairs = -1 + # it will never equal to 0 if exit_when_empty is false + + while total_pairs != 0: + region = looppoint.get_current_region() + # if it is a significant PC Count pair, then the get_current_region() + # will return an integer greater than 0. By significant PC Count pair, + # it means the PC Count pair that indicates where to take the + # checkpoint at. This is determined in the LoopPoint module. + if region != -1: + if update_relatives: + looppoint.update_relatives_counts() + m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix()) + total_pairs -= 1 + yield False + + yield True From de5044cfe088449bba6d74d2f51540174880dac1 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 30 Jan 2023 01:09:45 +0000 Subject: [PATCH 212/492] stdlib: Change the default Looppoint JSON output to m5out Change-Id: I6ac9aa55dfd4a0250c487ae743a1f4ea0b1bc154 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67491 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce --- src/python/gem5/utils/looppoint.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py index b681e75fee..d1851a8478 100644 --- a/src/python/gem5/utils/looppoint.py +++ b/src/python/gem5/utils/looppoint.py @@ -34,6 +34,8 @@ from m5.objects import PcCountTrackerManager import csv import re import json +import m5 +import os class LoopPoint: @@ -106,14 +108,16 @@ class LoopPoint: self._json_file[rid]["simulation"]["end"]["relative"] = int(temp) def output_json_file( - self, input_indent: int = 4, filename: str = "outdir.json" + self, + input_indent: int = 4, + filepath: str = os.path.join(m5.options.outdir, "outdir.json"), ) -> Dict[int, Dict]: """ This function is used to output the _json_file into a json file :param input_indent: the indent value of the json file - :param filename: the name of the output file + :param filepath: the path of the output file """ - with open(filename, "w") as file: + with open(filepath, "w") as file: json.dump(self._json_file, file, indent=input_indent) def get_current_region(self) -> int: From 82aec1c19be55ac64864fb08099dba075db90a83 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 30 Jan 2023 01:16:27 +0000 Subject: [PATCH 213/492] stdlib: Update LoopPointRestore to take singular region_id This change has been introduced as we only restore to one checkpoint at a time. The change updates LoopPointRestore to take a sigular region_id and use it to generate the LoopPpoint from the LoopPoint JSON file. Change-Id: I0e88d5ba03b164bdd5da098397f44e16af591134 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67492 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- src/python/gem5/utils/looppoint.py | 111 ++++++++++++----------------- 1 file changed, 45 insertions(+), 66 deletions(-) diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py index d1851a8478..8e01e3030f 100644 --- a/src/python/gem5/utils/looppoint.py +++ b/src/python/gem5/utils/looppoint.py @@ -28,7 +28,7 @@ from m5.util import fatal from m5.params import PcCountPair from pathlib import Path -from typing import List, Dict +from typing import List, Dict, Tuple from gem5.components.processors.abstract_processor import AbstractProcessor from m5.objects import PcCountTrackerManager import csv @@ -330,76 +330,55 @@ class LoopPointCheckpoint(LoopPoint): class LoopPointRestore(LoopPoint): - def __init__(self, looppoint_file: Path, checkpoint_path: Path) -> None: + def __init__(self, looppoint_file: Path, region_id: int) -> None: """ - This class is specifically designed to take in the LoopPoint data file and - generator information needed to restore a checkpoint taken by the + This class is specifically designed to take in the LoopPoint data file + and generator information needed to restore a checkpoint taken by the LoopPointCheckPoint. :param looppoint_file: a json file generated by gem5 that has all the LoopPoint data information - :param checkpoint_path: the director of the checkpoint taken by the gem5 - standard library looppoint_save_checkpoint_generator - + :param region_id: The region ID we will be restoring to. """ - _json_file = {} - _targets = [] - _region_id = {} - - self.profile_restore( - looppoint_file, checkpoint_path, _targets, _json_file, _region_id - ) - - super().__init__( - _targets, - _region_id, - _json_file, - ) - - def profile_restore( - self, - looppoint_file_path: Path, - checkpoint_dir: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function is used to profile data from the LoopPoint data file to - information needed to restore the LoopPoint checkpoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - regex = re.compile(r"cpt.Region([0-9]+)") - rid = regex.findall(checkpoint_dir.as_posix())[0] - # finds out the region id from the directory name - with open(looppoint_file_path) as file: + with open(looppoint_file) as file: json_file = json.load(file) - if rid not in json_file: - # if the region id does not exist in the LoopPoint data file - # raise a fatal message - fatal(f"{rid} is not a valid region\n") - region = json_file[rid] - if "warmup" in region: - if "relative" not in region["simulation"]["start"]: - # if there are not relative counts for the PC Count pair - # then it means there is not enough information to restore - # this checkpoint - fatal(f"region {rid} doesn't have relative count info\n") - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["relative"], - ) - region_id[start] = rid - targets.append(start) - if "relative" not in region["simulation"]["end"]: - fatal(f"region {rid} doesn't have relative count info\n") - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["relative"], + + targets, regions = self.get_region( + json_file=json_file, region_id=region_id + ) + + super().__init__(targets=targets, regions=regions, json_file=json_file) + + def get_region( + self, json_file: Dict[int, Dict], region_id: int + ) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]: + to_return_region = {} + to_return_targets = [] + + if region_id not in json_file: + # if the region id does not exist in the LoopPoint data + # file raise a fatal message + fatal(f"{region_id} is not a valid region\n") + region = json_file[region_id] + if "warmup" in region: + if "relative" not in region["simulation"]["start"]: + # if there are not relative counts for the PC Count + # pair then it means there is not enough information to + # restore this checkpoint + fatal(f"region {region_id} doesn't have relative count info\n") + start = PcCountPair( + region["simulation"]["start"]["pc"], + region["simulation"]["start"]["relative"], ) - region_id[end] = rid - targets.append(end) + to_return_region[start] = region_id + to_return_targets.append(start) + if "relative" not in region["simulation"]["end"]: + fatal(f"region {region_id} doesn't have relative count info\n") + end = PcCountPair( + region["simulation"]["end"]["pc"], + region["simulation"]["end"]["relative"], + ) + to_return_region[end] = region_id + to_return_targets.append(end) + + return to_return_targets, to_return_region From f59d860e5104a4f073c7b108b576b88afec1e5cb Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 30 Jan 2023 01:29:45 +0000 Subject: [PATCH 214/492] stdlib: Add looppoint example scripts Change-Id: If9827af9ba7958af492a6c09cf83e4f6dac9a2eb Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67493 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce --- .../create-looppoint-checkpoints.py | 155 ++++++++++++++++++ .../restore-looppoint-checkpoint.py | 150 +++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100644 configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py create mode 100644 configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py new file mode 100644 index 0000000000..1d8525fe77 --- /dev/null +++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This configuration script shows an example of how to take checkpoints for +LoopPoint using the gem5 stdlib. To take checkpoints for LoopPoint simulation +regions, there must be a LoopPoint data file generated by Pin or the gem5 +simulator. With the information in the LoopPoint data file, the stdlib +modules will take checkpoints at the beginning of the simulation regions +(warmup region included if it exists) and record all restore needed information +into a JSON file. The JSON file is needed for later restoring, so please call +`looppoint.output_json_file()` at the end of the simulation. + +This script builds a simple board with the gem5 stdlib with no cache and a +simple memory structure to take checkpoints. Some of the components, such as +cache hierarchy, can be changed when restoring checkpoints. + +Usage +----- +``` +scons build/X86/gem5.opt +./build/X86/gem5.opt \ + configs/example/gem5_library/looppoints/create-looppoint-checkpoint.py +``` +""" + +from gem5.simulate.exit_event import ExitEvent +from gem5.simulate.simulator import Simulator +from gem5.utils.requires import requires +from gem5.components.cachehierarchies.classic.no_cache import NoCache +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.memory.single_channel import SingleChannelDDR3_1600 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.resources.resource import obtain_resource +from pathlib import Path +from gem5.simulate.exit_event_generators import ( + looppoint_save_checkpoint_generator, +) +from gem5.utils.looppoint import LoopPointCheckpoint +import argparse + +requires(isa_required=ISA.X86) + +parser = argparse.ArgumentParser( + description="An example looppoint workload file path" +) + +# The lone arguments is a file path to a directory to store the checkpoints. + +parser.add_argument( + "--checkpoint-path", + type=str, + required=False, + default="looppoint_checkpoints_folder", + help="The directory to store the checkpoints.", +) + +args = parser.parse_args() + +# When taking a checkpoint, the cache state is not saved, so the cache +# hierarchy can be changed completely when restoring from a checkpoint. +# By using NoCache() to take checkpoints, it can slightly improve the +# performance when running in atomic mode, and it will not put any restrictions +# on what people can do with the checkpoints. +cache_hierarchy = NoCache() + + +# Using simple memory to take checkpoints might slightly imporve the +# performance in atomic mode. The memory structure can be changed when +# restoring from a checkpoint, but the size of the memory must be equal or +# greater to that taken when creating the checkpoint. +memory = SingleChannelDDR3_1600(size="2GB") + +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, + isa=ISA.X86, + # LoopPoint can work with multicore workloads + num_cores=9, +) + +looppoint = LoopPointCheckpoint( + # Pass in the LoopPoint data file + looppoint_file=Path( + obtain_resource( + "x86-matrix-multiply-omp-100-8-global-pinpoints" + ).get_local_path() + ), + # True if the LoopPoint data file is a csv generated by Pin. + # False if it is a JSON file generated by the gem5 simulator. + if_csv=True, +) + +board = SimpleBoard( + clk_freq="3GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, +) + +board.set_se_looppoint_workload( + binary=obtain_resource("x86-matrix-multiply-omp"), + arguments=[100, 8], + # Pass LoopPoint module into the board + looppoint=looppoint, +) + +dir = Path(args.checkpoint_path) +dir.mkdir(exist_ok=True) + +simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.SIMPOINT_BEGIN: looppoint_save_checkpoint_generator( + checkpoint_dir=dir, + looppoint=looppoint, + # True if the relative PC count pairs should be updated during the + # simulation. Default as True. + update_relatives=True, + # True if the simulation loop should exit after all the PC count + # pairs in the LoopPoint data file have been encountered. Default + # as True. + exit_when_empty=True, + ) + }, +) + +simulator.run() + +# Output the JSON file +looppoint.output_json_file() diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py new file mode 100644 index 0000000000..28645259d0 --- /dev/null +++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py @@ -0,0 +1,150 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This configuration script shows an example of how to restore a checkpoint that +was taken for a LoopPoint simulation region in the example-restore.py. +All the LoopPoint information should be passed in through the JSON file +generated by the gem5 simulator when all the checkpoints were taken. + +This script builds a more complex board than the board used for taking +checkpoints. + +Usage +----- +``` +./build/X86/gem5.opt \ + configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py +``` +""" +import argparse + +from gem5.simulate.exit_event import ExitEvent +from gem5.simulate.simulator import Simulator +from gem5.utils.requires import requires +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.resources.resource import obtain_resource +from pathlib import Path +from gem5.utils.looppoint import LoopPointRestore +from m5.stats import reset, dump + +requires(isa_required=ISA.X86) + +parser = argparse.ArgumentParser(description="An restore checkpoint script.") + +parser.add_argument( + "--checkpoint-region", + type=str, + required=False, + choices=( + "1", + "2", + "3", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + ), + default="1", + help="The checkpoint region to restore from.", +) +args = parser.parse_args() + +# The cache hierarchy can be different from the cache hierarchy used in taking +# the checkpoints +cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( + l1d_size="32kB", + l1i_size="32kB", + l2_size="256kB", +) + +# The memory structure can be different from the memory structure used in +# taking the checkpoints, but the size of the memory must be equal or larger. +memory = DualChannelDDR4_2400(size="2GB") + +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, + isa=ISA.X86, + # The number of cores must be equal or greater than that used when taking + # the checkpoint. + num_cores=9, +) + +board = SimpleBoard( + clk_freq="3GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, +) + +looppoint = LoopPointRestore( + looppoint_file=Path( + obtain_resource( + "x86-matrix-multiply-omp-100-8-looppoint" + ).get_local_path() + ), + region_id=args.checkpoint_region, +) + +board.set_se_looppoint_workload( + binary=obtain_resource("x86-matrix-multiply-omp"), looppoint=looppoint +) + +# This generator will dump the stats and exit the simulation loop when the +# simulation region reaches its end. In the case there is a warmup interval, +# the simulation stats are reset after the warmup is complete. +def reset_and_dump(): + if len(looppoint.get_targets()) > 1: + print("Warmup region ended. Resetting stats.") + reset() + yield False + print("Region ended. Dumping stats.") + dump() + yield True + + +simulator = Simulator( + board=board, + checkpoint_path=obtain_resource( + f"x86-matrix-multiply-omp-100-8-looppoint-checkpoint-region-{args.checkpoint_region}" + ).get_local_path(), + on_exit_event={ExitEvent.SIMPOINT_BEGIN: reset_and_dump()}, +) + +simulator.run() From aae34302814df270748d542b7fb795f03dd66fad Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sun, 5 Feb 2023 20:20:30 +0000 Subject: [PATCH 215/492] stdlib: Refactor Looppoint This change refactors the Looppoint files. While functionally equivalent, this classes have been moved and altered to be easier to handle going forward. The following changes have been made: - New classes have been added to represent the data structure of the Looppoint JSON. This simplifies the parsing of JSON files and makes it handle Looppoint data structures. Ultimately this is hidden from the user via the new 'gem5.resources.Looppoint' class which will be the front-facing class for Looppoint interactions. - The `LooppointCheckpoint` class has been replaced with `LooppointCsvLoader`. This new class takes in a CSV pintpoints file to load necessary looppoint data. - The `LoopPointRestore` class has been replaced by `LooppointJsonLoader`. - All Looppoint classes have been moved to `gem5.resources`. This will make it easier when we add Looppoints as specific gem5 resources. Change-Id: I11dd1fe8f76658db220320584270d57cb37a3c62 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67611 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- .../create-looppoint-checkpoints.py | 9 +- .../restore-looppoint-checkpoint.py | 4 +- src/python/SConscript | 2 +- .../components/boards/se_binary_workload.py | 21 +- src/python/gem5/resources/looppoint.py | 544 ++++++++++++++++++ .../gem5/simulate/exit_event_generators.py | 6 +- src/python/gem5/utils/looppoint.py | 384 ------------- 7 files changed, 564 insertions(+), 406 deletions(-) create mode 100644 src/python/gem5/resources/looppoint.py delete mode 100644 src/python/gem5/utils/looppoint.py diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py index 1d8525fe77..f967aa56e4 100644 --- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py +++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py @@ -61,7 +61,7 @@ from pathlib import Path from gem5.simulate.exit_event_generators import ( looppoint_save_checkpoint_generator, ) -from gem5.utils.looppoint import LoopPointCheckpoint +from gem5.resources.looppoint import LooppointCsvLoader import argparse requires(isa_required=ISA.X86) @@ -103,16 +103,13 @@ processor = SimpleProcessor( num_cores=9, ) -looppoint = LoopPointCheckpoint( +looppoint = LooppointCsvLoader( # Pass in the LoopPoint data file looppoint_file=Path( obtain_resource( "x86-matrix-multiply-omp-100-8-global-pinpoints" ).get_local_path() - ), - # True if the LoopPoint data file is a csv generated by Pin. - # False if it is a JSON file generated by the gem5 simulator. - if_csv=True, + ) ) board = SimpleBoard( diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py index 28645259d0..c54fdabca1 100644 --- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py +++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py @@ -55,7 +55,7 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.resources.resource import obtain_resource from pathlib import Path -from gem5.utils.looppoint import LoopPointRestore +from gem5.utils.resource import LooppointJsonLoader from m5.stats import reset, dump requires(isa_required=ISA.X86) @@ -113,7 +113,7 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -looppoint = LoopPointRestore( +looppoint = LooppointJsonLoader( looppoint_file=Path( obtain_resource( "x86-matrix-multiply-omp-100-8-looppoint" diff --git a/src/python/SConscript b/src/python/SConscript index 68b5e1d926..f401c03468 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -240,7 +240,6 @@ PySource('gem5.components.processors', PySource('gem5.components.processors', 'gem5/components/processors/switchable_processor.py') PySource('gem5.utils', 'gem5/utils/simpoint.py') -PySource('gem5.utils', 'gem5/utils/looppoint.py') PySource('gem5.components.processors', 'gem5/components/processors/traffic_generator_core.py') PySource('gem5.components.processors', @@ -263,6 +262,7 @@ PySource('gem5.resources', 'gem5/resources/downloader.py') PySource('gem5.resources', 'gem5/resources/md5_utils.py') PySource('gem5.resources', 'gem5/resources/resource.py') PySource('gem5.resources', 'gem5/resources/workload.py') +PySource('gem5.resources', 'gem5/resources/looppoint.py') PySource('gem5.utils', 'gem5/utils/__init__.py') PySource('gem5.utils', 'gem5/utils/filelock.py') PySource('gem5.utils', 'gem5/utils/override.py') diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index 404a78458f..dc5425754e 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -35,7 +35,7 @@ from ...resources.resource import ( SimpointDirectoryResource, ) -from gem5.utils.looppoint import LoopPoint +from gem5.resources.looppoint import Looppoint from m5.objects import SEWorkload, Process @@ -177,9 +177,10 @@ class SEBinaryWorkload: def set_se_looppoint_workload( self, binary: AbstractResource, + looppoint: Looppoint, arguments: List[str] = [], - looppoint: Optional[Union[AbstractResource, LoopPoint]] = None, checkpoint: Optional[Union[Path, AbstractResource]] = None, + region_id: Optional[Union[int, str]] = None, ) -> None: """Set up the system to run a LoopPoint workload. @@ -188,18 +189,18 @@ class SEBinaryWorkload: ISA and the simulated ISA are the same. :param binary: The resource encapsulating the binary to be run. - :param arguments: The input arguments for the binary :param looppoint: The LoopPoint object that contain all the information gather from the LoopPoint files and a LoopPointManager that will raise exit events for LoopPoints + :param arguments: The input arguments for the binary + :param region_id: If set, will only load the Looppoint region + corresponding to that ID. """ - if isinstance(looppoint, AbstractResource): - self._looppoint_object = LoopPoint(looppoint) - else: - assert isinstance(looppoint, LoopPoint) - self._looppoint_object = looppoint - + assert isinstance(looppoint, Looppoint) + self._looppoint_object = looppoint + if region_id: + self._looppoint_object.set_target_region_id(region_id=region_id) self._looppoint_object.setup_processor(self.get_processor()) # Call set_se_binary_workload after LoopPoint setup is complete @@ -209,7 +210,7 @@ class SEBinaryWorkload: checkpoint=checkpoint, ) - def get_looppoint(self) -> LoopPoint: + def get_looppoint(self) -> Looppoint: """ Returns the LoopPoint object set. If no LoopPoint object has been set an exception is thrown. diff --git a/src/python/gem5/resources/looppoint.py b/src/python/gem5/resources/looppoint.py new file mode 100644 index 0000000000..684faef37d --- /dev/null +++ b/src/python/gem5/resources/looppoint.py @@ -0,0 +1,544 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import PcCountPair +from m5.objects import PcCountTrackerManager +import m5 + +import os +import csv +import json +from pathlib import Path +from typing import List, Optional, Dict, Union + + +class LooppointRegionPC: + """A data structure for storing the Looppoint region's PC information. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, pc: int, globl: int, relative: Optional[int] = None): + """ + :param pc: The Program Counter value of this region. + :param globl: The global value of this region. + :param relative: The relative program counter value. Optional. + """ + self._pc = pc + self._global = globl + self._relative = relative + + def get_pc(self) -> int: + """Returns the Program counter value.""" + return self._pc + + def get_global(self) -> int: + """Returns the global value.""" + return self._global + + def get_relative(self) -> Optional[int]: + """If specified, returns the relative Program counter value, otherwise + returns None.""" + return self._relative + + def get_pc_count_pair(self) -> PcCountPair: + """Returns the PcCountPair for this Region PC value.""" + return PcCountPair(self.get_pc(), self.get_global()) + + def update_relative_count(self, manager: PcCountTrackerManager) -> None: + """Updates the relative count.""" + self._relative = int( + self.get_global() - manager.getPcCount(self.get_pc()) + ) + + def to_json(self) -> Dict[str, int]: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + to_return = { + "pc": self.get_pc(), + "global": self.get_global(), + } + if self._relative: + to_return["relative"] = self.get_relative() + + return to_return + + +class LooppointRegionWarmup: + """A data structure for storing a Looppoint region's warmup data. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, start: PcCountPair, end: PcCountPair): + """ + :param start: The starting PcCountPair. + :param end: The ending PcCountPair. + """ + self._start = start + self._end = end + + def get_start(self) -> PcCountPair: + """Returns the PcCountPair for the start of the region warmup.""" + return self._start + + def get_end(self) -> PcCountPair: + """Returns the PcCountPair for the end of the region warmup.""" + return self._end + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the start and end PC count pairs.""" + return [self.get_start(), self.get_end()] + + def to_json(self) -> Dict[str, Dict[str, int]]: + """Returns this class in a JSON structure which can then be + serialized.""" + return { + "start": { + "pc": self.get_start().pc, + "count": self.get_start().count, + }, + "end": { + "pc": self.get_end().pc, + "count": self.get_end().count, + }, + } + + +class LooppointSimulation: + """A data structure to store the simulation region start and end region. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, start: LooppointRegionPC, end: LooppointRegionPC): + """ + :param start: The starting LooppointRegionPC. + :param end: The ending LoopppointRegionPC. + """ + self._start = start + self._end = end + + def get_start(self) -> LooppointRegionPC: + """Returns the starting LooppointRegionPC data structure.""" + return self._start + + def get_end(self) -> LooppointRegionPC: + """Returns the ending LooppointRegionPC data structure.""" + return self._end + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the PC count pairs for the start and end + LoopointRegionPCs.""" + return [ + self.get_start().get_pc_count_pair(), + self.get_end().get_pc_count_pair(), + ] + + def update_relatives_counts( + self, manager: PcCountTrackerManager, include_start: bool = False + ) -> None: + """Updates the relative counts for this simulation region.""" + if include_start: + # if this region has a warmup interval, + # then update the relative count for the + # start of the simulation region + self.get_start().update_relative_count(manager=manager) + + self.get_end().update_relative_count(manager=manager) + + def to_json(self) -> Dict: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + return { + "start": self.get_start().to_json(), + "end": self.get_end().to_json(), + } + + +class LooppointRegion: + """A data structure to store Looppoint region information. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__( + self, + simulation: LooppointSimulation, + multiplier: float, + warmup: Optional[LooppointRegionWarmup] = None, + ): + """ + :param simulation: The simulation information for this Looppoint + region. + :param multiplier: The multiplier for this Looppoint region. + :param warmup: The warmup information for this Looppoint region. + Optional. + """ + self._simulation = simulation + self._multiplier = multiplier + self._warmup = warmup + + def get_simulation(self) -> LooppointSimulation: + """Returns the simulation region information.""" + return self._simulation + + def get_multiplier(self) -> float: + """Returns the multiplier.""" + return self._multiplier + + def get_warmup(self) -> Optional[LooppointRegionWarmup]: + """If set, returns the warmup region information. Otherwise None.""" + return self._warmup + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the PC count pairs for this Looppoint region.""" + pc_count_pairs = self.get_simulation().get_pc_count_pairs() + if self.get_warmup(): + pc_count_pairs.extend(self.get_warmup().get_pc_count_pairs()) + return pc_count_pairs + + def update_relatives_counts(self, manager: PcCountTrackerManager) -> None: + """Updates the relative counds of this Looppoint region.""" + self.get_simulation().update_relatives_counts( + manager=manager, include_start=bool(self.get_warmup()) + ) + + def get_start(self) -> PcCountPair: + """Returns the correct starting PcCountPair for this Looppoint + region.""" + if self.get_warmup(): + return self.get_warmup().get_start() + return self.get_simulation().get_start().get_pc_count_pair() + + def to_json(self) -> Dict: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + to_return = { + "simulation": self.get_simulation().to_json(), + "multiplier": self.get_multiplier(), + } + if self.get_warmup(): + to_return["warmup"] = self.get_warmup().to_json() + return to_return + + +class Looppoint: + """Stores all the Looppoint information for a gem5 workload.""" + + def __init__(self, regions: Dict[Union[str, int], LooppointRegion]): + """ + :param regions: A dictionary mapping the region_ids with the + LooppointRegions. + """ + self._regions = regions + self._manager = PcCountTrackerManager() + self._manager.targets = self.get_targets() + + def set_target_region_id(self, region_id: Union[str, int]) -> None: + """There are use-cases where we want to obtain a looppoint data + structure containing a single target region via its ID. This function + will remove all irrelevant regions.""" + + if region_id not in self._regions: + raise Exception(f"Region ID '{region_id}' cannot be found.") + + to_remove = [rid for rid in self._regions if rid is not region_id] + for rid in to_remove: + del self._regions[rid] + + self._manager.targets = self.get_targets() + + def get_manager(self) -> PcCountTrackerManager: + """Returns the PcCountTrackerManager for this Looppoint data + structure.""" + return self._manager + + def get_regions(self) -> Dict[Union[int, str], LooppointRegion]: + """Returns the regions for this Looppoint data structure.""" + return self._regions + + def setup_processor( + self, + processor: "AbstractProcessor", + ) -> None: + """ + A function is used to setup a PC tracker in all the cores and + connect all the tracker to the PC tracker manager to perform + multithread PC tracking. + + :param processor: The processor used in the simulation configuration. + """ + for core in processor.get_cores(): + core.add_pc_tracker_probe(self.get_targets(), self.get_manager()) + + def update_relatives_counts(self) -> None: + """ + Updates the relative count for restore usage. The new relative count + will be stored in relevant data structures. + """ + current_pair = self.get_current_pair() + region_start_map = self.get_region_start_id_map() + if current_pair in region_start_map: + region_id = region_start_map[current_pair] + self.get_regions()[region_id].update_relatives_counts( + manager=self.get_manager() + ) + + def get_current_region(self) -> Optional[Union[str, int]]: + """Returns the region id if the current PC Count pair if significant + (e.g. beginning of the checkpoint), otherwise, it returns None to + indicate the current PC Count pair is not significant. + """ + current_pair = self.get_current_pair() + region_start_map = self.get_region_start_id_map() + if current_pair in region_start_map: + return region_start_map[current_pair] + return None + + def get_current_pair(self) -> PcCountPair: + """This function returns the current PC Count pair.""" + return self.get_manager().getCurrentPcCountPair() + + def get_region_start_id_map(self) -> Dict[PcCountPair, Union[int, str]]: + """Returns the starting PcCountPairs mapped to the corresponding region + IDs. This is a helper function for quick mapping of PcCountPairs to + region IDs.""" + + regions = {} + for rid in self.get_regions(): + regions[self.get_regions()[rid].get_start()] = rid + + return regions + + def get_targets(self) -> List[PcCountPair]: + """Returns the complete list of target PcCountPairs. That is, the + PcCountPairs each region starts with as well as the relevant warmup + intervals.""" + targets = [] + for rid in self.get_regions(): + targets.extend(self.get_regions()[rid].get_pc_count_pairs()) + + return targets + + def to_json(self) -> Dict[Union[int, str], Dict]: + """Returns this data-structure as a dictionary for serialization via + the `output_json_file` function.""" + to_return = {} + for region_id in self.get_regions(): + to_return[region_id] = self.get_regions()[region_id].to_json() + return to_return + + def output_json_file( + self, + input_indent: int = 4, + filepath: str = os.path.join(m5.options.outdir, "looppoint.json"), + ) -> Dict[int, Dict]: + """ + This function is used to output the _json_file into a json file + + :param input_indent: the indent value of the json file + :param filepath: the path of the output json file + """ + with open(filepath, "w") as file: + json.dump(self.to_json(), file, indent=input_indent) + + +class LooppointCsvLoader(Looppoint): + """This class will create a Looppoint data structure from data extracted + from a Looppoint pinpoints file.""" + + def __init__( + self, + pinpoints_file: Union[Path, str], + region_id: Optional[Union[str, int]] = None, + ): + """ + :params pinpoints_file: The pinpoints file in which the data is to be + expected. + :params region_id: If set, will only load the specified region data. + Otherwise, all region info is loaded. Is used when restoring to a + particular region. + """ + + regions = {} + warmups = {} + + _path = ( + pinpoints_file + if isinstance(pinpoints_file, Path) + else Path(pinpoints_file) + ) + + # This section is hard-coded to parse the data in the csv file. + # The csv file is assumed to have a constant format. + with open(_path, newline="") as csvfile: + reader = csv.reader(csvfile, delimiter=" ", quotechar="|") + for row in reader: + if len(row) > 1: + if row[0] == "cluster": + # if it is a simulation region + line = row[4].split(",") + + rid = int(line[2]) + + region_start = LooppointRegionPC( + pc=int(line[3], 16), + globl=int(line[6]), + # From the CSV's I've observed, the start relative + # value is never set, while the end is always set. + # Given limited information, I can only determine + # this is a rule of how the CSV is setup. + relative=None, + ) + + region_end = LooppointRegionPC( + pc=int(line[7], 16), + globl=int(line[10]), + relative=int(line[11]), + ) + + simulation = LooppointSimulation( + start=region_start, end=region_end + ) + + multiplier = float(line[14]) + + region = LooppointRegion( + simulation=simulation, multiplier=multiplier + ) + + regions[rid] = region + + elif row[0] == "Warmup": + line = row[3].split(",") + rid = int(line[0]) + start = PcCountPair(int(line[3], 16), int(line[6])) + end = PcCountPair(int(line[7], 16), int(line[10])) + + warmup = LooppointRegionWarmup(start=start, end=end) + warmups[rid] = warmup + + for rid in warmups: + if rid not in regions: + raise Exception( + "Warmup region ID '{rid}' does not have a " + "corresponding region." + ) + regions[rid]._warmup = warmups[rid] + + super().__init__(regions=regions) + + if region_id: + self.set_target_region_id(region_id=region_id) + + +class LooppointJsonLoader(Looppoint): + """This class will create a generate a Looppoint data structure from data + extracted from a Looppoint json file.""" + + def __init__( + self, + looppoint_file: Union[str, Path], + region_id: Optional[Union[str, int]] = None, + ) -> None: + """ + :param looppoint_file: a json file generated by gem5 that has all the + LoopPoint data information + :params region_id: If set, will only load the specified region data. + Otherwise, all region info is loaded. Is used when restoring to a + particular region. + """ + + _path = ( + looppoint_file + if isinstance(looppoint_file, Path) + else Path(looppoint_file) + ) + + regions = {} + with open(_path) as file: + json_contents = json.load(file) + for rid in json_contents: + + start_pc = int(json_contents[rid]["simulation"]["start"]["pc"]) + start_globl = int( + json_contents[rid]["simulation"]["start"]["global"] + ) + start_relative = ( + int(json_contents[rid]["simulation"]["start"]["relative"]) + if "relative" in json_contents[rid]["simulation"]["start"] + else None + ) + start = LooppointRegionPC( + pc=start_pc, + globl=start_globl, + relative=start_relative, + ) + + end_pc = int(json_contents[rid]["simulation"]["end"]["pc"]) + end_globl = int( + json_contents[rid]["simulation"]["end"]["global"] + ) + end_relative = ( + int(json_contents[rid]["simulation"]["end"]["relative"]) + if "relative" in json_contents[rid]["simulation"]["end"] + else None + ) + end = LooppointRegionPC( + pc=end_pc, + globl=end_globl, + relative=end_relative, + ) + simulation = LooppointSimulation(start=start, end=end) + multiplier = float(json_contents[rid]["multiplier"]) + warmup = None + if "warmup" in json_contents[rid]: + start = PcCountPair( + json_contents[rid]["warmup"]["start"]["pc"], + json_contents[rid]["warmup"]["start"]["count"], + ) + end = PcCountPair( + json_contents[rid]["warmup"]["end"]["pc"], + json_contents[rid]["warmup"]["end"]["count"], + ) + warmup = LooppointRegionWarmup(start=start, end=end) + + regions[rid] = LooppointRegion( + simulation=simulation, multiplier=multiplier, warmup=warmup + ) + + super().__init__(regions=regions) + if region_id: + self.set_target_region_id(region_id=region_id) diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py index 82eba17543..37998d3a9b 100644 --- a/src/python/gem5/simulate/exit_event_generators.py +++ b/src/python/gem5/simulate/exit_event_generators.py @@ -29,7 +29,7 @@ import m5.stats from ..components.processors.abstract_processor import AbstractProcessor from ..components.processors.switchable_processor import SwitchableProcessor from ..resources.resource import SimpointResource -from gem5.utils.looppoint import LoopPoint +from gem5.resources.looppoint import Looppoint from m5.util import warn from pathlib import Path @@ -172,7 +172,7 @@ def simpoints_save_checkpoint_generator( def looppoint_save_checkpoint_generator( checkpoint_dir: Path, - looppoint: LoopPoint, + looppoint: Looppoint, update_relatives: bool = True, exit_when_empty: bool = True, ): @@ -203,7 +203,7 @@ def looppoint_save_checkpoint_generator( # will return an integer greater than 0. By significant PC Count pair, # it means the PC Count pair that indicates where to take the # checkpoint at. This is determined in the LoopPoint module. - if region != -1: + if region: if update_relatives: looppoint.update_relatives_counts() m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix()) diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py deleted file mode 100644 index 8e01e3030f..0000000000 --- a/src/python/gem5/utils/looppoint.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright (c) 2022 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -from m5.util import fatal -from m5.params import PcCountPair -from pathlib import Path -from typing import List, Dict, Tuple -from gem5.components.processors.abstract_processor import AbstractProcessor -from m5.objects import PcCountTrackerManager -import csv -import re -import json -import m5 -import os - - -class LoopPoint: - """ - This LoopPoint class is used to manage the information needed for LoopPoint - in workload - """ - - def __init__( - self, - targets: List[PcCountPair], - regions: Dict[PcCountPair, int], - json_file: Dict[int, Dict], - ) -> None: - """ - :param targets: a list of PcCountPair that are used to generate exit - event at when the PcCountTrackerManager encounter this PcCountPair in - execution - :param regions: a dictionary used to find the corresponding region id - for the significant PcCountPair. This is mainly used to ensure - checkpoints are taken in the correct PcCountPair or relative counts are - updated at the correct count - :param json_file: all the LoopPoint data including relative counts and - multiplier are stored in this parameter. It can be outputted as a json - file. - """ - - self._manager = PcCountTrackerManager() - self._manager.targets = targets - self._targets = targets - self._regions = regions - self._json_file = json_file - - def setup_processor( - self, - processor: AbstractProcessor, - ) -> None: - """ - This function is used to setup a PC tracker in all the cores and - connect all the tracker to the PC tracker manager to perform - multithread PC tracking - :param processor: the processor used in the simulation configuration - """ - for core in processor.get_cores(): - core.add_pc_tracker_probe(self._targets, self._manager) - - def update_relatives_counts(self) -> None: - """ - This function is used to update the relative count for restore used. - The new relative count will be stored in the _json_file and can be - outputted into a json file by calling the output_json_file function. - """ - current_pair = self._manager.getCurrentPcCountPair() - if current_pair in self._regions: - rid = self._regions[current_pair] - region = self._json_file[rid]["simulation"] - if "warmup" in self._json_file[rid]: - # if this region has a warmup interval, - # then update the relative count for the - # start of the simulation region - start = region["start"]["pc"] - temp = region["start"]["global"] - self._manager.getPcCount( - start - ) - self._json_file[rid]["simulation"]["start"]["relative"] = int( - temp - ) - end = region["end"]["pc"] - temp = region["end"]["global"] - self._manager.getPcCount(end) - self._json_file[rid]["simulation"]["end"]["relative"] = int(temp) - - def output_json_file( - self, - input_indent: int = 4, - filepath: str = os.path.join(m5.options.outdir, "outdir.json"), - ) -> Dict[int, Dict]: - """ - This function is used to output the _json_file into a json file - :param input_indent: the indent value of the json file - :param filepath: the path of the output file - """ - with open(filepath, "w") as file: - json.dump(self._json_file, file, indent=input_indent) - - def get_current_region(self) -> int: - """ - This function returns the region id if the current PC Count pair is - significant(e.x. beginning of the checkpoint), otherwise, it returns - a '-1' to indicate the current PC Count pair is not significant - """ - current_pair = self._manager.getCurrentPcCountPair() - if current_pair in self._regions: - return self._regions[current_pair] - return -1 - - def get_current_pair(self) -> PcCountPair: - """ - This function returns the current PC Count pair - """ - return self._manager.getCurrentPcCountPair() - - def get_regions(self) -> Dict[PcCountPair, int]: - """ - This function returns the complete dictionary of _regions - """ - return self._regions - - def get_targets(self) -> List[PcCountPair]: - """ - This function returns the complete list of _targets - """ - return self._targets - - -class LoopPointCheckpoint(LoopPoint): - def __init__(self, looppoint_file: Path, if_csv: bool) -> None: - """ - This class is specifically designed to take in the LoopPoint data file - and generate the information needed to take checkpoints for LoopPoint - regions(warmup region+simulation region) - :param looppoint_file: the director of the LoopPoint data file - :param if_csv: if the file is a csv file, then it is True. If the file - is a json file, then it is False - """ - - _json_file = {} - _targets = [] - _region_id = {} - - if if_csv: - self.profile_csv(looppoint_file, _targets, _json_file, _region_id) - else: - self.profile_json(looppoint_file, _targets, _json_file, _region_id) - - super().__init__( - _targets, - _region_id, - _json_file, - ) - - def profile_csv( - self, - looppoint_file_path: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function profiles the csv LoopPoint data file into three variables - to take correct checkpoints for LoopPoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - - # This section is hard-coded to parse the data in the csv file. - # The csv file is assumed to have a constant format. - with open(looppoint_file_path, newline="") as csvfile: - reader = csv.reader(csvfile, delimiter=" ", quotechar="|") - for row in reader: - if len(row) > 1: - if row[0] == "cluster": - # if it is a simulation region - line = row[4].split(",") - start = PcCountPair(int(line[3], 16), int(line[6])) - end = PcCountPair(int(line[7], 16), int(line[10])) - if int(line[2]) in json_file: - # if this region was created in the json_file - json_file[int(line[2])]["simulation"] = { - "start": {"pc": int(line[3], 16)} - } - else: - json_file[int(line[2])] = { - "simulation": { - "start": {"pc": int(line[3], 16)} - } - } - json_file[int(line[2])]["simulation"]["start"][ - "global" - ] = int(line[6]) - json_file[int(line[2])]["simulation"]["end"] = { - "pc": int(line[7], 16) - } - json_file[int(line[2])]["simulation"]["end"][ - "global" - ] = int(line[10]) - json_file[int(line[2])]["multiplier"] = float(line[14]) - targets.append(start) - targets.append(end) - # store all the PC Count pairs from the file to the - # targets list - elif row[0] == "Warmup": - line = row[3].split(",") - start = PcCountPair(int(line[3], 16), int(line[6])) - end = PcCountPair(int(line[7], 16), int(line[10])) - if int(line[0]) in json_file: - json_file[int(line[0])]["warmup"] = { - "start": {"pc": int(line[3], 16)} - } - else: - json_file[int(line[0])] = { - "warmup": {"start": {"pc": int(line[3], 16)}} - } - json_file[int(line[0])]["warmup"]["start"][ - "count" - ] = int(line[6]) - json_file[int(line[0])]["warmup"]["end"] = { - "pc": int(line[7], 16) - } - json_file[int(line[0])]["warmup"]["end"][ - "count" - ] = int(line[10]) - targets.append(start) - targets.append(end) - # store all the PC Count pairs from the file to the - # targets list - - for rid, region in json_file.items(): - # this loop iterates all the regions and find the significant PC - # Count pair for the region - if "warmup" in region: - # if the region has a warmup interval, then the checkpoint - # should be taken at the start of the warmup interval - start = PcCountPair( - region["warmup"]["start"]["pc"], - region["warmup"]["start"]["count"], - ) - else: - # if the region does not have a warmup interval, then the - # checkpoint should be taken at the start of the simulation - # region - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["global"], - ) - region_id[start] = rid - - def profile_json( - self, - looppoint_file_path: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function profiles the json LoopPoint data file into three - variables to take correct checkpoints for LoopPoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - - with open(looppoint_file_path) as file: - json_file = json.load(file) - # load all json information into the json_file variable - for rid, region in json_file.items(): - # iterates all regions - sim_start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["global"], - ) - targets.append(sim_start) - # store all PC Count pairs in the file into targets list - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["global"], - ) - targets.append(end) - if "warmup" in region: - # if there is a warmup in the region, then the checkpoint - # should be taken at the start of the warmup interval - start = PcCountPair( - region["warmup"]["start"]["pc"], - region["warmup"]["start"]["count"], - ) - targets.append(start) - end = PcCountPair( - region["warmup"]["end"]["pc"], - region["warmup"]["end"]["count"], - ) - targets.append(end) - else: - # if there is not a warmup interval in the region, then the - # checkpoint should be taken at the start of the simulation - # region - start = sim_start - region_id[start] = rid - - -class LoopPointRestore(LoopPoint): - def __init__(self, looppoint_file: Path, region_id: int) -> None: - """ - This class is specifically designed to take in the LoopPoint data file - and generator information needed to restore a checkpoint taken by the - LoopPointCheckPoint. - :param looppoint_file: a json file generated by gem5 that has all the - LoopPoint data information - :param region_id: The region ID we will be restoring to. - """ - - with open(looppoint_file) as file: - json_file = json.load(file) - - targets, regions = self.get_region( - json_file=json_file, region_id=region_id - ) - - super().__init__(targets=targets, regions=regions, json_file=json_file) - - def get_region( - self, json_file: Dict[int, Dict], region_id: int - ) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]: - to_return_region = {} - to_return_targets = [] - - if region_id not in json_file: - # if the region id does not exist in the LoopPoint data - # file raise a fatal message - fatal(f"{region_id} is not a valid region\n") - region = json_file[region_id] - if "warmup" in region: - if "relative" not in region["simulation"]["start"]: - # if there are not relative counts for the PC Count - # pair then it means there is not enough information to - # restore this checkpoint - fatal(f"region {region_id} doesn't have relative count info\n") - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["relative"], - ) - to_return_region[start] = region_id - to_return_targets.append(start) - if "relative" not in region["simulation"]["end"]: - fatal(f"region {region_id} doesn't have relative count info\n") - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["relative"], - ) - to_return_region[end] = region_id - to_return_targets.append(end) - - return to_return_targets, to_return_region From 52194c87b0ca80c480e5b5c8b26868204d601f19 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 8 Feb 2023 20:06:16 +0000 Subject: [PATCH 216/492] tests: Add pyunit tests for Looppoint Change-Id: Ie275e50bbcf5fb3d38ee98d7ada27c3afe4ec1b0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67757 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- tests/pyunit/stdlib/pyunit_looppoint.py | 600 ++++++++++++++++++ .../matrix.1_92.global.pinpoints_reduced.csv | 28 + tests/pyunit/stdlib/refs/output.json | 40 ++ 3 files changed, 668 insertions(+) create mode 100644 tests/pyunit/stdlib/pyunit_looppoint.py create mode 100644 tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv create mode 100644 tests/pyunit/stdlib/refs/output.json diff --git a/tests/pyunit/stdlib/pyunit_looppoint.py b/tests/pyunit/stdlib/pyunit_looppoint.py new file mode 100644 index 0000000000..0cb708e8ac --- /dev/null +++ b/tests/pyunit/stdlib/pyunit_looppoint.py @@ -0,0 +1,600 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import unittest + +from m5.params import PcCountPair + +from gem5.resources.looppoint import ( + Looppoint, + LooppointRegionPC, + LooppointRegionWarmup, + LooppointSimulation, + LooppointRegion, + LooppointCsvLoader, + LooppointJsonLoader, +) + +import os + + +class LooppointRegionPCTestSuite(unittest.TestCase): + """Tests the resources.looppoint.LooppointRegionPC class.""" + + def test_construction_with_relative(self) -> None: + region_pc = LooppointRegionPC(pc=444, globl=65, relative=454) + + self.assertEquals(444, region_pc.get_pc()) + self.assertEquals(65, region_pc.get_global()) + self.assertEquals(454, region_pc.get_relative()) + + def test_construction_without_relative(self) -> None: + region_pc = LooppointRegionPC(pc=43454, globl=653434) + + self.assertEquals(43454, region_pc.get_pc()) + self.assertEquals(653434, region_pc.get_global()) + self.assertIsNone(region_pc.get_relative()) + + def test_get_pc_count_pair(self) -> None: + region_pc = LooppointRegionPC(pc=1, globl=2) + expected = PcCountPair(1, 2) + self.assertEquals(expected, region_pc.get_pc_count_pair()) + + def update_relative_count(self) -> None: + pass # Not really sure what to do here... + + def test_to_json_with_relative(self) -> None: + region_pc = LooppointRegionPC(pc=100, globl=200, relative=300) + json_contents = region_pc.to_json() + + self.assertEquals(3, len(json_contents)) + self.assertTrue("pc" in json_contents) + self.assertEquals(100, json_contents["pc"]) + self.assertTrue("global" in json_contents) + self.assertEquals(200, json_contents["global"]) + self.assertTrue("relative" in json_contents) + self.assertEquals(300, json_contents["relative"]) + + def test_to_json_without_relative(self) -> None: + region_pc = LooppointRegionPC(pc=1111, globl=2222) + json_contents = region_pc.to_json() + + self.assertEquals(2, len(json_contents)) + self.assertTrue("pc" in json_contents) + self.assertEquals(1111, json_contents["pc"]) + self.assertTrue("global" in json_contents) + self.assertEquals(2222, json_contents["global"]) + self.assertFalse("relative" in json_contents) + + +class LooppointRegionWarmupTestSuite(unittest.TestCase): + """Tests the resources.looppoint.LooppointWarmup class.""" + + def test_construction(self) -> None: + region_warmup = LooppointRegionWarmup( + start=PcCountPair(123, 456), end=PcCountPair(789, 1011) + ) + + self.assertEquals(PcCountPair(123, 456), region_warmup.get_start()) + self.assertEquals(PcCountPair(789, 1011), region_warmup.get_end()) + + def test_get_pc_count_pairs(self) -> None: + region_warmup = LooppointRegionWarmup( + start=PcCountPair(1, 1), end=PcCountPair(2, 2) + ) + + output = region_warmup.get_pc_count_pairs() + self.assertEquals(2, len(output)) + self.assertEquals(PcCountPair(1, 1), output[0]) + self.assertEquals(PcCountPair(2, 2), output[1]) + + def test_to_json(self) -> None: + region_warmup = LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ) + + expected = { + "start": {"pc": 100, "count": 200}, + "end": {"pc": 101, "count": 202}, + } + + self.assertDictEqual(expected, region_warmup.to_json()) + + +class LooppointSimulationTestSuite(unittest.TestCase): + """Tests the resources.looppoint.LooppointSimulation class.""" + + def test_construction_with(self) -> None: + sim = LooppointSimulation( + start=LooppointRegionPC(pc=444, globl=65, relative=454), + end=LooppointRegionPC(pc=555, globl=699), + ) + + sim_start = sim.get_start() + + self.assertEquals(444, sim_start.get_pc()) + self.assertEquals(65, sim_start.get_global()) + self.assertEquals(454, sim_start.get_relative()) + + sim_end = sim.get_end() + + self.assertEquals(555, sim_end.get_pc()) + self.assertEquals(699, sim_end.get_global()) + self.assertIsNone(sim_end.get_relative()) + + def test_get_pc_count_pairs(self) -> None: + sim = LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=45, relative=34), + end=LooppointRegionPC(pc=23, globl=12), + ) + + sim_pc_count_pairs = sim.get_pc_count_pairs() + self.assertEquals(2, len(sim_pc_count_pairs)) + self.assertEquals(PcCountPair(56, 45), sim_pc_count_pairs[0]) + self.assertEquals(PcCountPair(23, 12), sim_pc_count_pairs[1]) + + def test_get_json(self) -> None: + sim = LooppointSimulation( + start=LooppointRegionPC(pc=1, globl=2, relative=3), + end=LooppointRegionPC(pc=4, globl=5), + ) + expected = { + "start": { + "pc": 1, + "global": 2, + "relative": 3, + }, + "end": { + "pc": 4, + "global": 5, + }, + } + self.assertDictEqual(expected, sim.to_json()) + + +class LooppointRegionTestSuite(unittest.TestCase): + """Tests the resources.looppoint.LooppointRegion class.""" + + def test_construction_with_warmup(self): + region = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=1, globl=2, relative=3), + end=LooppointRegionPC(pc=6, globl=7), + ), + multiplier=5.6, + warmup=LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ), + ) + + self.assertTrue( + isinstance(region.get_simulation(), LooppointSimulation) + ) + self.assertEquals(5.6, region.get_multiplier()) + self.assertIsNotNone(region.get_warmup()) + self.assertTrue(isinstance(region.get_warmup(), LooppointRegionWarmup)) + + def test_construction_without_warmup(self): + region = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=2345, relative=344), + end=LooppointRegionPC(pc=645, globl=457), + ), + multiplier=5444.4, + ) + + self.assertTrue( + isinstance(region.get_simulation(), LooppointSimulation) + ) + self.assertEquals(5444.4, region.get_multiplier()) + self.assertIsNone(region.get_warmup()) + + def test_get_pc_count_pairs_with_warmup(self): + region = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=1, globl=2, relative=3), + end=LooppointRegionPC(pc=6, globl=7), + ), + multiplier=5.6, + warmup=LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ), + ) + pc_count_pairs = region.get_pc_count_pairs() + + self.assertEquals(4, len(pc_count_pairs)) + self.assertEquals(PcCountPair(1, 2), pc_count_pairs[0]) + self.assertEquals(PcCountPair(6, 7), pc_count_pairs[1]) + self.assertEquals(PcCountPair(100, 200), pc_count_pairs[2]) + self.assertEquals(PcCountPair(101, 202), pc_count_pairs[3]) + + def test_get_pc_count_pairs_without_warmup(self): + region = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=2345, relative=344), + end=LooppointRegionPC(pc=645, globl=457), + ), + multiplier=5444.4, + ) + + pc_count_pairs = region.get_pc_count_pairs() + + self.assertEquals(2, len(pc_count_pairs)) + self.assertEquals(PcCountPair(56, 2345), pc_count_pairs[0]) + self.assertEquals(PcCountPair(645, 457), pc_count_pairs[1]) + + +class LooppointTestSuite(unittest.TestCase): + """Tests the resources.looppoint.Looppoint class.""" + + def test_construction(self): + region1 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=2345, relative=344), + end=LooppointRegionPC(pc=645, globl=457), + ), + multiplier=5444.4, + ) + region2 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=67, globl=254, relative=3345), + end=LooppointRegionPC(pc=64554, globl=7454), + ), + multiplier=5.6, + warmup=LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ), + ) + + looppoint = Looppoint( + regions={ + 1: region1, + 3: region2, + } + ) + + self.assertEquals(2, len(looppoint.get_regions())) + self.assertTrue(1 in looppoint.get_regions()) + self.assertEquals(region1, looppoint.get_regions()[1]) + self.assertTrue(3 in looppoint.get_regions()) + self.assertEquals(region2, looppoint.get_regions()[3]) + + def test_get_targets(self): + region1 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=2345, relative=344), + end=LooppointRegionPC(pc=645, globl=457), + ), + multiplier=5444.4, + ) + region2 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=67, globl=254, relative=3345), + end=LooppointRegionPC(pc=64554, globl=7454), + ), + multiplier=5.6, + warmup=LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ), + ) + + looppoint = Looppoint( + regions={ + 1: region1, + 3: region2, + } + ) + + targets = looppoint.get_targets() + self.assertEquals(6, len(targets)) + self.assertEquals(PcCountPair(56, 2345), targets[0]) + self.assertEquals(PcCountPair(645, 457), targets[1]) + self.assertEquals(PcCountPair(67, 254), targets[2]) + self.assertEquals(PcCountPair(64554, 7454), targets[3]) + self.assertEquals(PcCountPair(100, 200), targets[4]) + self.assertEquals(PcCountPair(101, 202), targets[5]) + + def test_get_region_start_id_map(self): + + region1 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=2345, relative=344), + end=LooppointRegionPC(pc=645, globl=457), + ), + multiplier=5444.4, + ) + region2 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=67, globl=254, relative=3345), + end=LooppointRegionPC(pc=64554, globl=7454), + ), + multiplier=5.6, + warmup=LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ), + ) + + looppoint = Looppoint( + regions={ + 1: region1, + 3: region2, + } + ) + + region_start_id_map = looppoint.get_region_start_id_map() + + self.assertEquals(2, len(region_start_id_map)) + + # The start of region1. + self.assertTrue(PcCountPair(56, 2345) in region_start_id_map) + self.assertEquals(1, region_start_id_map[PcCountPair(56, 2345)]) + + # The start of region2. Since this has a warmup, it's the warmup. + self.assertTrue(PcCountPair(100, 200) in region_start_id_map) + self.assertEquals(3, region_start_id_map[PcCountPair(100, 200)]) + + def test_to_json(self) -> None: + region1 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=56, globl=2345, relative=344), + end=LooppointRegionPC(pc=645, globl=457), + ), + multiplier=5444.4, + ) + region2 = LooppointRegion( + simulation=LooppointSimulation( + start=LooppointRegionPC(pc=67, globl=254, relative=3345), + end=LooppointRegionPC(pc=64554, globl=7454), + ), + multiplier=5.6, + warmup=LooppointRegionWarmup( + start=PcCountPair(100, 200), end=PcCountPair(101, 202) + ), + ) + + looppoint = Looppoint( + regions={ + 1: region1, + 3: region2, + } + ) + + expected = { + 1: { + "simulation": { + "start": { + "pc": 56, + "global": 2345, + "relative": 344, + }, + "end": { + "pc": 645, + "global": 457, + }, + }, + "multiplier": 5444.4, + }, + 3: { + "simulation": { + "start": { + "pc": 67, + "global": 254, + "relative": 3345, + }, + "end": { + "pc": 64554, + "global": 7454, + }, + }, + "multiplier": 5.6, + "warmup": { + "start": { + "pc": 100, + "count": 200, + }, + "end": { + "pc": 101, + "count": 202, + }, + }, + }, + } + + # Need to increase the max for if there is an error. + self.maxDiff = 2056 + self.assertDictEqual(expected, looppoint.to_json()) + + +class LooppointCSVLoaderTestSuite(unittest.TestCase): + """Tests the resources.looppoint.LooppointCsvLoader class.""" + + def test_load_pinpoints_matrix(self): + looppoint = LooppointCsvLoader( + pinpoints_file=os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "matrix.1_92.global.pinpoints_reduced.csv", + ) + ) + + regions = looppoint.get_regions() + self.assertEquals(3, len(regions)) + + region1 = regions[1] + self.assertEquals(4.0, region1.get_multiplier()) + + region1start = region1.get_simulation().get_start() + self.assertEquals(0x4069D0, region1start.get_pc()) + self.assertEquals(211076617, region1start.get_global()) + self.assertIsNone(region1start.get_relative()) + + region1end = region1.get_simulation().get_end() + self.assertEquals(0x4069D0, region1end.get_pc()) + self.assertEquals(219060252, region1end.get_global()) + self.assertIsNotNone(region1end.get_relative()) + self.assertEquals(1060676, region1end.get_relative()) + + self.assertIsNone(region1.get_warmup()) + + region2 = regions[2] + self.assertEquals(5.001, region2.get_multiplier()) + + region2start = region2.get_simulation().get_start() + self.assertEquals(0x4069D0, region2start.get_pc()) + self.assertEquals(407294228, region2start.get_global()) + self.assertIsNone(region2start.get_relative()) + + region2end = region2.get_simulation().get_end() + self.assertEquals(0x4069D0, region2end.get_pc()) + self.assertEquals(415282447, region2end.get_global()) + self.assertIsNotNone(region2end.get_relative()) + self.assertEquals(1035231, region2end.get_relative()) + + region2warmup = region2.get_warmup() + self.assertIsNotNone(region2warmup) + self.assertEquals( + PcCountPair(0x406880, 48111518), region2warmup.get_start() + ) + self.assertEquals( + PcCountPair(0x4069D0, 407294228), region2warmup.get_end() + ) + + region3 = regions[3] + self.assertEquals(4.0, region3.get_multiplier()) + + region3start = region3.get_simulation().get_start() + self.assertEquals(0x4069D0, region3start.get_pc()) + self.assertEquals(187978221, region3start.get_global()) + self.assertIsNone(region3start.get_relative()) + + region3end = region3.get_simulation().get_end() + self.assertEquals(0x406880, region3end.get_pc()) + self.assertEquals(23520614, region3end.get_global()) + self.assertIsNotNone(region3end.get_relative()) + self.assertEquals(144352, region3end.get_relative()) + + self.assertIsNone(region3.get_warmup()) + + def test_load_pinpoints_matrix_region_1(self): + looppoint = LooppointCsvLoader( + pinpoints_file=os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "matrix.1_92.global.pinpoints_reduced.csv", + ), + region_id=1, + ) + + regions = looppoint.get_regions() + self.assertEquals(1, len(regions)) + + self.assertTrue(1 in regions) + region1 = regions[1] + self.assertEquals(4.0, region1.get_multiplier()) + + region1start = region1.get_simulation().get_start() + self.assertEquals(0x4069D0, region1start.get_pc()) + self.assertEquals(211076617, region1start.get_global()) + self.assertIsNone(region1start.get_relative()) + + region1end = region1.get_simulation().get_end() + self.assertEquals(0x4069D0, region1end.get_pc()) + self.assertEquals(219060252, region1end.get_global()) + self.assertIsNotNone(region1end.get_relative()) + self.assertEquals(1060676, region1end.get_relative()) + + self.assertIsNone(region1.get_warmup()) + + +class LooppointJsonLoaderTestSuite(unittest.TestCase): + """Tests the resources.looppoint.LooppointJsonLoader class.""" + + def test_load_pinpoints_matrix_region_1(self): + looppoint = LooppointJsonLoader( + looppoint_file=os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "output.json", + ), + region_id="1", + ) + + self.assertEquals(1, len(looppoint.get_regions())) + self.assertTrue("1" in looppoint.get_regions()) + region = looppoint.get_regions()["1"] + + self.assertEquals(4.0, region.get_multiplier()) + + region_start = region.get_simulation().get_start() + self.assertEquals(4221392, region_start.get_pc()) + self.assertEquals(211076617, region_start.get_global()) + self.assertIsNotNone(region_start.get_relative()) + self.assertEquals(15326617, region_start.get_relative()) + + region_end = region.get_simulation().get_end() + self.assertEquals(4221392, region_end.get_pc()) + self.assertEquals(219060252, region_end.get_global()) + self.assertIsNotNone(region_end.get_relative()) + self.assertEquals(23310252, region_end.get_relative()) + + region_warmup = region.get_warmup() + self.assertIsNotNone(region_warmup) + + self.assertEquals( + PcCountPair(4221056, 23520614), region_warmup.get_start() + ) + self.assertEquals( + PcCountPair(4221392, 211076617), region_warmup.get_end() + ) + + def test_load_pinpoints_matrix_region_2(self): + looppoint = LooppointJsonLoader( + looppoint_file=os.path.join( + os.path.realpath(os.path.dirname(__file__)), + "refs", + "output.json", + ), + region_id="2", + ) + + self.assertEquals(1, len(looppoint.get_regions())) + self.assertTrue("2" in looppoint.get_regions()) + region = looppoint.get_regions()["2"] + + self.assertEquals(5.001, region.get_multiplier()) + + region_start = region.get_simulation().get_start() + self.assertEquals(4221392, region_start.get_pc()) + self.assertEquals(407294228, region_start.get_global()) + self.assertIsNone(region_start.get_relative()) + + region_end = region.get_simulation().get_end() + self.assertEquals(4221392, region_end.get_pc()) + self.assertEquals(415282447, region_end.get_global()) + self.assertIsNone(region_end.get_relative()) + + region_warmup = region.get_warmup() + self.assertIsNone(region_warmup) diff --git a/tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv b/tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv new file mode 100644 index 0000000000..56e3fc57bf --- /dev/null +++ b/tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv @@ -0,0 +1,28 @@ +# Regions based on: /home/alen/isca2022/looppoint/tools/sde-external-9.0.0-2021-11-07-lin/pinplay-scripts/pcregions.py --label_file t.labels --warmup_factor 2 --tid global --bbv_file t.bb --region_file t.simpoints --weight_file t.weights + +# comment,thread-id,region-id,start-pc, start-image-name, start-image-offset, start-pc-count,end-pc, end-image-name, end-image-offset, end-pc-count,end-pc-relative-count, region-length, region-weight, region-multiplier, region-type + +# RegionId = 1 Slice = 27 Icount = 2160042521 Length = 79958388 Weight = 0.04651 Multiplier = 4.000 ClusterSlicecount = 4 ClusterIcount = 320005167 +#Start: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 211076617 source-info: matrix-omp.cpp:75 +#End: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 219060252 relative_count: 1060676.0 source-info: matrix-omp.cpp:75 +cluster 0 from slice 27,global,1,0x4069d0,matrix-omp,0x69d0,211076617,0x4069d0,matrix-omp,0x69d0,219060252,1060676,79958388,0.04651,4.000,simulation + +# RegionId = 2 Slice = 52 Icount = 4160001603 Length = 80000011 Weight = 0.05814 Multiplier = 5.001 ClusterSlicecount = 5 ClusterIcount = 400040399 +#Start: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 407294228 source-info: matrix-omp.cpp:75 +#End: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 415282447 relative_count: 1035231.0 source-info: matrix-omp.cpp:75 +cluster 1 from slice 52,global,2,0x4069d0,matrix-omp,0x69d0,407294228,0x4069d0,matrix-omp,0x69d0,415282447,1035231,80000011,0.05814,5.001,simulation + +# RegionId = 3 Slice = 24 Icount = 1920000792 Length = 80027459 Weight = 0.04651 Multiplier = 4.000 ClusterSlicecount = 4 ClusterIcount = 320021091 +#Start: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 187978221 source-info: matrix-omp.cpp:75 +#End: pc : 0x406880 image: matrix-omp offset: 0x6880 absolute_count: 23520614 relative_count: 144352.0 source-info: matrix-omp.cpp:95 +cluster 2 from slice 24,global,3,0x4069d0,matrix-omp,0x69d0,187978221,0x406880,matrix-omp,0x6880,23520614,144352,80027459,0.04651,4.000,simulation + +# RegionId = 16 Slice = 50 Icount = 4000001542 Length = 160000061 WarmupFactor = 2 +#Start: pc : 0x406880 image: matrix-omp offset: 0x6880 absolute_count: 48111518 source-info: matrix-omp.cpp:95 +#End: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 407294228 relative_count: 2004125.0 source-info: matrix-omp.cpp:75 +Warmup for regionid 2,global,16,0x406880,matrix-omp,0x6880,48111518,0x4069d0,matrix-omp,0x69d0,407294228,2004125,160000061,0.00000,0.000,warmup:2 + +# First PC, 0x403050 +# Total instructions in 13 regions = 1039968792 +# Total instructions in workload = 6880002703 +# Total slices in workload = 86 diff --git a/tests/pyunit/stdlib/refs/output.json b/tests/pyunit/stdlib/refs/output.json new file mode 100644 index 0000000000..4324bac3de --- /dev/null +++ b/tests/pyunit/stdlib/refs/output.json @@ -0,0 +1,40 @@ +{ + "1": { + "simulation": { + "start": { + "pc": 4221392, + "global": 211076617, + "relative": 15326617 + }, + "end": { + "pc": 4221392, + "global": 219060252, + "relative": 23310252 + } + }, + "multiplier": 4.0, + "warmup": { + "start": { + "pc": 4221056, + "count": 23520614 + }, + "end": { + "pc": 4221392, + "count": 211076617 + } + } + }, + "2": { + "simulation": { + "start": { + "pc": 4221392, + "global": 407294228 + }, + "end": { + "pc": 4221392, + "global": 415282447 + } + }, + "multiplier": 5.001 + } +} From 29b19530ce6dbca9fa3d00b987cdde592e1ab0d0 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 8 Feb 2023 20:07:25 +0000 Subject: [PATCH 217/492] tests: Incorporate Looppoint example scripts into TestLib Change-Id: I97d89d3cc80ce8d8991ca8d3cb4aab8019324d76 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67758 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce --- .../test_gem5_library_examples.py | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py index 9b5c2c67ff..7db46b0770 100644 --- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py +++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2021-2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -324,3 +324,57 @@ gem5_verify_config( valid_hosts=constants.supported_hosts, length=constants.very_long_tag, ) + +gem5_verify_config( + name="test-gem5-library-create-looppoint-checkpoints", + fixtures=(), + verifiers=(), + config=joinpath( + config.base_dir, + "configs", + "example", + "gem5_library", + "looppoints", + "create-looppoint-checkpoint.py", + ), + config_args=[ + "--checkpoint-path", + joinpath(resource_path, "looppoint-checkpoint-save"), + ], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.very_long_tag, +) + +for region in ( + "1", + "2", + "3", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", +): + gem5_verify_config( + name=f"test-gem5-library-restore-looppoint-checkpoint-region-f{region}", + fixtures=(), + verifiers=(), + config=joinpath( + config.base_dir, + "configs", + "example", + "gem5_library", + "looppoints", + "restore-looppoint-checkpoint.py", + ), + config_args=["--checkpoint-region", region], + valid_isas=(constants.all_compiled_tag,), + valid_hosts=constants.supported_hosts, + length=constants.very_long_tag, + ) From 4ad1150372aa1b3ecced8438c50b625ae6433f55 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 10 Feb 2023 15:15:42 +0000 Subject: [PATCH 218/492] stdlib: Add the LooppointCsvResource resource This resource wraps the LooppointCsvLoader class so it may be obtained as a specialized resource via gem5 resources. Relevant tests and config scripts have been updated. Change-Id: Ib8e5ff5500fb1560951c9c0110e3c3aec8ca3c42 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67857 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- .../create-looppoint-checkpoints.py | 12 ++-------- src/python/gem5/resources/resource.py | 22 +++++++++++++++++++ .../pyunit_resource_specialization.py | 21 ++++++++++++++++++ .../refs/resource-specialization.json | 9 ++++++++ 4 files changed, 54 insertions(+), 10 deletions(-) diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py index f967aa56e4..6c23d38c7d 100644 --- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py +++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py @@ -61,7 +61,7 @@ from pathlib import Path from gem5.simulate.exit_event_generators import ( looppoint_save_checkpoint_generator, ) -from gem5.resources.looppoint import LooppointCsvLoader + import argparse requires(isa_required=ISA.X86) @@ -103,15 +103,6 @@ processor = SimpleProcessor( num_cores=9, ) -looppoint = LooppointCsvLoader( - # Pass in the LoopPoint data file - looppoint_file=Path( - obtain_resource( - "x86-matrix-multiply-omp-100-8-global-pinpoints" - ).get_local_path() - ) -) - board = SimpleBoard( clk_freq="3GHz", processor=processor, @@ -119,6 +110,7 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) +looppoint = obtain_resource("x86-matrix-multiply-omp-100-8-global-pinpoints") board.set_se_looppoint_workload( binary=obtain_resource("x86-matrix-multiply-omp"), arguments=[100, 8], diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py index 678497eaa7..0cf58800f2 100644 --- a/src/python/gem5/resources/resource.py +++ b/src/python/gem5/resources/resource.py @@ -31,6 +31,7 @@ from m5.util import warn, fatal from .downloader import get_resource, get_resources_json_obj +from .looppoint import LooppointCsvLoader from ..isas import ISA, get_isa_from_str from typing import Optional, Dict, Union, Type, Tuple, List @@ -394,6 +395,26 @@ class SimpointResource(AbstractResource): return warmup_list +class LooppointCsvResource(FileResource, LooppointCsvLoader): + """This Looppoint resource used to create a Looppoint resource from a + pinpoints CSV file""" + + def __init__( + self, + local_path: str, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + FileResource.__init__( + self, + local_path=local_path, + documentation=documentation, + source=source, + ) + LooppointCsvLoader.__init__(self, pinpoints_file=Path(local_path)) + + class SimpointDirectoryResource(SimpointResource): """A Simpoint diretory resource. This Simpoint Resource assumes the existance of a directory containing a simpoint file and a weight file.""" @@ -714,4 +735,5 @@ _get_resource_json_type_map = { "simpoint": SimpointResource, "simpoint-directory": SimpointDirectoryResource, "resource": Resource, + "looppoint-pinpoint-csv": LooppointCsvResource, } diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py index f31e35d719..5c60eb5c4a 100644 --- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py +++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py @@ -29,6 +29,7 @@ import unittest from pathlib import Path from gem5.resources.resource import * +from gem5.resources.looppoint import LooppointCsvLoader from gem5.isas import ISA @@ -235,3 +236,23 @@ class ResourceSpecializationSuite(unittest.TestCase): "directory-example documentation.", resource.get_documentation() ) self.assertIsNone(resource.get_source()) + + def test_looppoint_pinpoints_resource(self) -> None: + """Tests the creation of LooppointCreatorCSVResource via a Looppoint + pinpoints csv file.""" + + resource = obtain_resource( + resource_name="looppoint-pinpoint-csv-resource", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, LooppointCsvResource) + + # The LooppointCreatorCSVResource should be a subtype of + # LooppointCsvLoader. + self.assertIsInstance(resource, LooppointCsvLoader) + + self.assertEquals( + "A looppoint pinpoints csv file.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json index 01671b564b..bfe0d4a448 100644 --- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json +++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json @@ -108,6 +108,15 @@ "warmup_interval": 23445, "simpoint_list" : [2,3,4,15], "weight_list" : [0.1, 0.2, 0.4, 0.3] + }, + { + "type": "looppoint-pinpoint-csv", + "name": "looppoint-pinpoint-csv-resource", + "documentation" : "A looppoint pinpoints csv file.", + "is_zipped" : false, + "md5sum" : "199ab22dd463dc70ee2d034bfe045082", + "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127", + "source" : null } ] } From 5d0dd10cfa04967d8e6b9814f03a09b4b901a362 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 10 Feb 2023 21:01:34 +0000 Subject: [PATCH 219/492] stdlib: Add LooppointJsonResource resource This resource wraps the LooppointJsonLoader class for use with gem5 resources. Change-Id: Ic00d689c289330bab8564abc4c68a9047d5096e0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67858 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce --- src/python/gem5/resources/resource.py | 23 +++++++++++++++- .../pyunit_resource_specialization.py | 27 ++++++++++++++++++- .../refs/resource-specialization.json | 10 +++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py index 0cf58800f2..9cba9cf88f 100644 --- a/src/python/gem5/resources/resource.py +++ b/src/python/gem5/resources/resource.py @@ -31,7 +31,7 @@ from m5.util import warn, fatal from .downloader import get_resource, get_resources_json_obj -from .looppoint import LooppointCsvLoader +from .looppoint import LooppointCsvLoader, LooppointJsonLoader from ..isas import ISA, get_isa_from_str from typing import Optional, Dict, Union, Type, Tuple, List @@ -415,6 +415,26 @@ class LooppointCsvResource(FileResource, LooppointCsvLoader): LooppointCsvLoader.__init__(self, pinpoints_file=Path(local_path)) +class LooppointJsonResource(FileResource, LooppointJsonLoader): + def __init__( + self, + local_path: str, + region_id: Optional[Union[str, int]] = None, + documentation: Optional[str] = None, + source: Optional[str] = None, + **kwargs, + ): + FileResource.__init__( + self, + local_path=local_path, + documentation=documentation, + source=source, + ) + LooppointJsonLoader.__init__( + self, looppoint_file=local_path, region_id=region_id + ) + + class SimpointDirectoryResource(SimpointResource): """A Simpoint diretory resource. This Simpoint Resource assumes the existance of a directory containing a simpoint file and a weight file.""" @@ -736,4 +756,5 @@ _get_resource_json_type_map = { "simpoint-directory": SimpointDirectoryResource, "resource": Resource, "looppoint-pinpoint-csv": LooppointCsvResource, + "looppoint-json": LooppointJsonResource, } diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py index 5c60eb5c4a..660bf5f38f 100644 --- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py +++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py @@ -29,7 +29,12 @@ import unittest from pathlib import Path from gem5.resources.resource import * -from gem5.resources.looppoint import LooppointCsvLoader + +from gem5.resources.looppoint import ( + LooppointCsvLoader, + LooppointJsonLoader, +) + from gem5.isas import ISA @@ -256,3 +261,23 @@ class ResourceSpecializationSuite(unittest.TestCase): "A looppoint pinpoints csv file.", resource.get_documentation() ) self.assertIsNone(resource.get_source()) + + def test_looppoint_json_restore_resource(self) -> None: + """Tests the creation of LooppointJsonResource via a + Looppoint JSON file.""" + + resource = obtain_resource( + resource_name="looppoint-json-restore-resource-region-1", + resource_directory=self.get_resource_dir(), + ) + + self.assertIsInstance(resource, LooppointJsonResource) + self.assertIsInstance(resource, LooppointJsonLoader) + + self.assertEquals(1, len(resource.get_regions())) + self.assertTrue("1" in resource.get_regions()) + + self.assertEquals( + "A looppoint json file resource.", resource.get_documentation() + ) + self.assertIsNone(resource.get_source()) diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json index bfe0d4a448..c4d5eb4714 100644 --- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json +++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json @@ -117,6 +117,16 @@ "md5sum" : "199ab22dd463dc70ee2d034bfe045082", "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127", "source" : null + }, + { + "type": "looppoint-json", + "name": "looppoint-json-restore-resource-region-1", + "documentation" : "A looppoint json file resource.", + "is_zipped" : false, + "region_id" : "1", + "md5sum" : "a71ed64908b082ea619b26b940a643c1", + "url": "http://dist.gem5.org/dist/develop/looppoints/x86-matrix-multiply-omp-100-8-looppoint-json-20230128", + "source" : null } ] } From ce516397dac6171ba06ffdd0bb8a581647ec321a Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 22 Feb 2023 08:41:11 -0800 Subject: [PATCH 220/492] configs stdlib: Update checkpoint resource for riscv-hello This change updates the riscv-hello-restore-checkpoint.py script's checkpoint for one compatible with v23 Change-Id: Idee262491db45049d9afe69190bc8890d75c8cdf Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68337 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- .../gem5_library/checkpoints/riscv-hello-restore-checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py index e112b76ddb..60a7dd0f59 100644 --- a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py +++ b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py @@ -90,7 +90,7 @@ board = SimpleBoard( board.set_se_binary_workload( # the workload should be the same as the save-checkpoint script Resource("riscv-hello"), - checkpoint=Resource("riscv-hello-example-checkpoint-v22-1"), + checkpoint=Resource("riscv-hello-example-checkpoint-v23"), ) simulator = Simulator( From 55348d062c9ff36538c9fee0c2e0867e0ccef4d9 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 22 Feb 2023 17:03:04 -0800 Subject: [PATCH 221/492] configs,stdlib: Update simpoint-se-restore checkpoint This patch fixes the checkpoint resource for the simpoints-se-restore.py script. Change-Id: I29698844023c54fdc645c99da4a19c77bae58729 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68338 Reviewed-by: Bobby Bruce Tested-by: kokoro Maintainer: Bobby Bruce --- .../example/gem5_library/checkpoints/simpoints-se-restore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py index 5ff82dba04..d063c143a7 100644 --- a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py +++ b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py @@ -119,7 +119,7 @@ board.set_se_simpoint_workload( weight_list=[0.1, 0.2, 0.4, 0.3], warmup_interval=1000000, ), - checkpoint=obtain_resource("simpoints-se-checkpoints-v22-1-v2"), + checkpoint=obtain_resource("simpoints-se-checkpoints-v23-0-v1"), ) From b4b024808ec21b006155f04852eb3f93877b2de9 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 22 Feb 2023 03:17:09 -0800 Subject: [PATCH 222/492] stdlib: Fix checkpoint setting through set_workload func Due to a typo ('checkpoint_dir' instead of 'checkpoint'), setting checkpoints via the set_workload functions was not working. This patch fixes this error. Change-Id: I5720406f2a01f166666e80079c1f84651f750fe2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68277 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/python/gem5/components/boards/se_binary_workload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index dc5425754e..2558ce3cf2 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -111,16 +111,16 @@ class SEBinaryWorkload: # Set whether to exit on work items for the se_workload self.exit_on_work_items = exit_on_work_items - # Here we set `self._checkpoint_dir`. This is then used by the + # Here we set `self._checkpoint`. This is then used by the # Simulator module to setup checkpoints. if checkpoint: if isinstance(checkpoint, Path): self._checkpoint = checkpoint elif isinstance(checkpoint, AbstractResource): - self._checkpoint_dir = Path(checkpoint.get_local_path()) + self._checkpoint = Path(checkpoint.get_local_path()) else: raise Exception( - "The checkpoint_dir must be None, Path, or " + "The checkpoint must be None, Path, or " "AbstractResource." ) From 3bb19be083d38249d1e1652184d05b0e6406a660 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sun, 19 Feb 2023 13:44:04 +0000 Subject: [PATCH 223/492] configs,stdlib: Add Workloads to Looppoint examples Change-Id: I6a0eebb127ad8a6796c96390594868668424c9b4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68117 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- .../create-looppoint-checkpoints.py | 14 ++++------- .../restore-looppoint-checkpoint.py | 23 +++++-------------- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py index 6c23d38c7d..abb15fb7f8 100644 --- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py +++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py @@ -56,7 +56,7 @@ from gem5.components.memory.single_channel import SingleChannelDDR3_1600 from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA -from gem5.resources.resource import obtain_resource +from gem5.resources.workload import Workload from pathlib import Path from gem5.simulate.exit_event_generators import ( looppoint_save_checkpoint_generator, @@ -110,13 +110,7 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -looppoint = obtain_resource("x86-matrix-multiply-omp-100-8-global-pinpoints") -board.set_se_looppoint_workload( - binary=obtain_resource("x86-matrix-multiply-omp"), - arguments=[100, 8], - # Pass LoopPoint module into the board - looppoint=looppoint, -) +board.set_workload(Workload("x86-matrix-multiply-omp-100-8-looppoint-csv")) dir = Path(args.checkpoint_path) dir.mkdir(exist_ok=True) @@ -126,7 +120,7 @@ simulator = Simulator( on_exit_event={ ExitEvent.SIMPOINT_BEGIN: looppoint_save_checkpoint_generator( checkpoint_dir=dir, - looppoint=looppoint, + looppoint=board.get_looppoint(), # True if the relative PC count pairs should be updated during the # simulation. Default as True. update_relatives=True, @@ -141,4 +135,4 @@ simulator = Simulator( simulator.run() # Output the JSON file -looppoint.output_json_file() +board.get_looppoint().output_json_file() diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py index c54fdabca1..21353a34a1 100644 --- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py +++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py @@ -54,8 +54,7 @@ from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.resources.resource import obtain_resource -from pathlib import Path -from gem5.utils.resource import LooppointJsonLoader +from gem5.resources.workload import Workload from m5.stats import reset, dump requires(isa_required=ISA.X86) @@ -113,24 +112,17 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -looppoint = LooppointJsonLoader( - looppoint_file=Path( - obtain_resource( - "x86-matrix-multiply-omp-100-8-looppoint" - ).get_local_path() - ), - region_id=args.checkpoint_region, -) - -board.set_se_looppoint_workload( - binary=obtain_resource("x86-matrix-multiply-omp"), looppoint=looppoint +board.set_workload( + Workload( + f"x86-matrix-multiply-omp-100-8-looppoint-region-{args.checkpoint_region}" + ) ) # This generator will dump the stats and exit the simulation loop when the # simulation region reaches its end. In the case there is a warmup interval, # the simulation stats are reset after the warmup is complete. def reset_and_dump(): - if len(looppoint.get_targets()) > 1: + if len(board.get_looppoint().get_targets()) > 1: print("Warmup region ended. Resetting stats.") reset() yield False @@ -141,9 +133,6 @@ def reset_and_dump(): simulator = Simulator( board=board, - checkpoint_path=obtain_resource( - f"x86-matrix-multiply-omp-100-8-looppoint-checkpoint-region-{args.checkpoint_region}" - ).get_local_path(), on_exit_event={ExitEvent.SIMPOINT_BEGIN: reset_and_dump()}, ) From 9fb5ce5cd3a425dffcde18caed36f428afdf3cbd Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 21 Feb 2023 11:45:44 +0800 Subject: [PATCH 224/492] arch-riscv,dev: Fix behavior issues of PLIC 1. Fix reserved size between enable memory map and threshold memory map. The number of enablePadding should be the number of context in PLIC 2. writePriority to memory should update Change-Id: Ib4b7e5ecd183863e140c4f3382a75057902d446d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68198 Reviewed-by: Ayaz Akram Tested-by: kokoro Reviewed-by: Yu-hsin Wang Maintainer: Jason Lowe-Power --- src/dev/riscv/plic.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dev/riscv/plic.cc b/src/dev/riscv/plic.cc index b8f765a17c..371af9e78a 100644 --- a/src/dev/riscv/plic.cc +++ b/src/dev/riscv/plic.cc @@ -203,7 +203,7 @@ Plic::PlicRegisters::init() - plic->nSrc32 * 4; reserved.emplace_back("reserved1", reserve1_size); const size_t reserve2_size = thresholdStart - enableStart - - plic->nSrc32 * plic->nContext * enablePadding; + - plic->nContext * enablePadding; reserved.emplace_back("reserved2", reserve2_size); const size_t reserve3_size = plic->pioSize - thresholdStart - plic->nContext * thresholdPadding; @@ -333,6 +333,8 @@ void Plic::writeThreshold(Register32& reg, const uint32_t& data, const int context_id) { + reg.update(data); + DPRINTF(Plic, "Threshold updated - context: %d, val: %d\n", context_id, reg.get()); From 379da2474b42c6aedfcc0a34367d41206efe1d4e Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Tue, 14 Feb 2023 10:48:38 -0800 Subject: [PATCH 225/492] cpu: Add fatal in BaseCPU for wrong workloads The CPU models assume that the number of workloads (Processes) is equal to the number of threads when using SE mode. This wasn't checked leading to a segfault if there were no workloads. This change makes the error more clear. Change-Id: I9a7b21112b8f819c6eeca944ee0d73ae9ce9a57b Signed-off-by: Jason Lowe-Power Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67937 Reviewed-by: Ayaz Akram Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/cpu/base.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 60d443af8c..d2c0a78d44 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -182,6 +182,12 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) "of threads (%i).\n", params().isa.size(), numThreads); } + if (!FullSystem && params().workload.size() != numThreads) { + fatal("Number of processes (cpu.workload) (%i) assigned to the CPU " + "does not equal number of threads (%i).\n", + params().workload.size(), numThreads); + } + modelResetPort.onChange([this](const bool &new_val) { setReset(new_val); }); From 30200051a300beb36e6e7842c93576771420a35a Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Wed, 15 Feb 2023 18:23:24 -0800 Subject: [PATCH 226/492] arch-x86,sim-se: Ignore the some mem syscalls This makes the Linux SE mode ignore mlockall and modify_ldt. It is needed to get ELFies working. Change-Id: I9fce3c6a5531e5f1bb094c2d0587fa330d2892a9 Signed-off-by: Jason Lowe-Power Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68037 Maintainer: Jason Lowe-Power Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/arch/x86/linux/syscall_tbl64.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc index 1e7274cc42..26299d884b 100644 --- a/src/arch/x86/linux/syscall_tbl64.cc +++ b/src/arch/x86/linux/syscall_tbl64.cc @@ -194,10 +194,10 @@ SyscallDescTable EmuLinux::syscallDescs64 = { { 148, "sched_rr_get_interval" }, { 149, "mlock" }, { 150, "munlock" }, - { 151, "mlockall" }, + { 151, "mlockall", ignoreFunc }, { 152, "munlockall" }, { 153, "vhangup" }, - { 154, "modify_ldt" }, + { 154, "modify_ldt", ignoreFunc }, { 155, "pivot_root" }, { 156, "_sysctl" }, { 157, "prctl", ignoreFunc }, From cd35c9a6194451952735f58fb09cb7983e5861ba Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Fri, 17 Feb 2023 17:11:36 -0800 Subject: [PATCH 227/492] stdlib: Add support for ELFies This enables the stdlib to load and run ELFie-based binaries See https://github.com/intel/pinball2elf for more details on ELFies Change-Id: Ic1b624df64da1c77afc0907257a9e989488912ec Signed-off-by: Jason Lowe-Power Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68038 Reviewed-by: Bobby Bruce Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/python/SConscript | 1 + .../components/boards/se_binary_workload.py | 32 +++++++++ src/python/gem5/resources/elfie.py | 67 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/python/gem5/resources/elfie.py diff --git a/src/python/SConscript b/src/python/SConscript index f401c03468..900723b0cf 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -263,6 +263,7 @@ PySource('gem5.resources', 'gem5/resources/md5_utils.py') PySource('gem5.resources', 'gem5/resources/resource.py') PySource('gem5.resources', 'gem5/resources/workload.py') PySource('gem5.resources', 'gem5/resources/looppoint.py') +PySource('gem5.resources', 'gem5/resources/elfie.py') PySource('gem5.utils', 'gem5/utils/__init__.py') PySource('gem5.utils', 'gem5/utils/filelock.py') PySource('gem5.utils', 'gem5/utils/override.py') diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index 2558ce3cf2..23be81b03d 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -35,6 +35,7 @@ from ...resources.resource import ( SimpointDirectoryResource, ) +from gem5.resources.elfie import ELFieInfo from gem5.resources.looppoint import Looppoint from m5.objects import SEWorkload, Process @@ -210,6 +211,37 @@ class SEBinaryWorkload: checkpoint=checkpoint, ) + def set_se_elfie_workload( + self, + elfie: AbstractResource, + elfie_info: ELFieInfo, + arguments: List[str] = [], + checkpoint: Optional[Union[Path, AbstractResource]] = None, + ) -> None: + """Set up the system to run a ELFie workload. + + **Limitations** + * Dynamically linked executables are partially supported when the host + ISA and the simulated ISA are the same. + + :param elfie: The resource encapsulating the binary elfie to be run. + :param elfie_info: The ELFieInfo object that contain all the + information for the ELFie + :param arguments: The input arguments for the binary + """ + + assert isinstance(elfie_info, ELFieInfo) + self._elfie_info_object = elfie_info + + self._elfie_info_object.setup_processor(self.get_processor()) + + # Call set_se_binary_workload after LoopPoint setup is complete + self.set_se_binary_workload( + binary=elfie, + arguments=arguments, + checkpoint=checkpoint, + ) + def get_looppoint(self) -> Looppoint: """ Returns the LoopPoint object set. If no LoopPoint object has been set diff --git a/src/python/gem5/resources/elfie.py b/src/python/gem5/resources/elfie.py new file mode 100644 index 0000000000..ae51388d62 --- /dev/null +++ b/src/python/gem5/resources/elfie.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import PcCountPair +from m5.objects import PcCountTrackerManager + +from typing import List + + +class ELFieInfo: + """Stores information to load/run ELFies + + See https://github.com/intel/pinball2elf for more information + """ + + def __init__(self, start: PcCountPair, end: PcCountPair): + self._start = start + self._end = end + self._manager = PcCountTrackerManager() + self._manager.targets = self.get_targets() + + def setup_processor( + self, + processor: "AbstractProcessor", + ) -> None: + """ + A function is used to setup a PC tracker in all the cores and + connect all the tracker to the PC tracker manager to perform + multithread PC tracking. + :param processor: The processor used in the simulation configuration. + """ + for core in processor.get_cores(): + core.add_pc_tracker_probe(self.get_targets(), self.get_manager()) + + def get_targets(self) -> List[PcCountPair]: + """Returns the complete list of targets PcCountPairs. That is, the + PcCountPairs each region starts with as well as the relevant warmup + intervals.""" + return [self._start, self._end] + + def get_manager(self) -> PcCountTrackerManager: + """Returns the PcCountTrackerManager for this ELFie data + structure.""" + return self._manager From a6048f2fe209f29c07238d0fca406fe0de82585e Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Thu, 26 Jan 2023 18:00:24 -0800 Subject: [PATCH 228/492] stdlib: Add progress bars for long functions This adds a progress bar for downloading large files and computing md5sums on large files. Change-Id: Iddc9faf61e861837cc1e2e3b3dbdbeebd6ccf529 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67472 Tested-by: kokoro Reviewed-by: Melissa Jost Maintainer: Jason Lowe-Power --- optional-requirements.txt | 1 + src/python/SConscript | 1 + src/python/gem5/resources/downloader.py | 25 ++++++-- src/python/gem5/resources/md5_utils.py | 19 ++++++- src/python/gem5/utils/progress_bar.py | 76 +++++++++++++++++++++++++ 5 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 optional-requirements.txt create mode 100644 src/python/gem5/utils/progress_bar.py diff --git a/optional-requirements.txt b/optional-requirements.txt new file mode 100644 index 0000000000..f88787df1f --- /dev/null +++ b/optional-requirements.txt @@ -0,0 +1 @@ +tqdm==4.64.1 diff --git a/src/python/SConscript b/src/python/SConscript index 900723b0cf..b0f11ddc73 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -267,6 +267,7 @@ PySource('gem5.resources', 'gem5/resources/elfie.py') PySource('gem5.utils', 'gem5/utils/__init__.py') PySource('gem5.utils', 'gem5/utils/filelock.py') PySource('gem5.utils', 'gem5/utils/override.py') +PySource('gem5.utils', 'gem5/utils/progress_bar.py') PySource('gem5.utils', 'gem5/utils/requires.py') PySource('gem5.utils.multiprocessing', 'gem5/utils/multiprocessing/__init__.py') diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py index 4a2ed5d332..24b8970cc0 100644 --- a/src/python/gem5/resources/downloader.py +++ b/src/python/gem5/resources/downloader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2021-2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -42,6 +42,7 @@ from urllib.error import HTTPError from typing import List, Dict, Set, Optional from .md5_utils import md5_file, md5_dir +from ..utils.progress_bar import tqdm, progress_hook from ..utils.filelock import FileLock @@ -286,10 +287,26 @@ def _download(url: str, download_to: str, max_attempts: int = 6) -> None: # get the file as a bytes blob request = urllib.request.Request(url) with urllib.request.urlopen(request, context=ctx) as fr: - with open(download_to, "wb") as fw: - fw.write(fr.read()) + with tqdm.wrapattr( + open(download_to, "wb"), + "write", + miniters=1, + desc="Downloading {download_to}", + total=getattr(fr, "length", None), + ) as fw: + for chunk in fr: + fw.write(chunk) else: - urllib.request.urlretrieve(url, download_to) + with tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + desc=f"Downloading {download_to}", + ) as t: + urllib.request.urlretrieve( + url, download_to, reporthook=progress_hook(t) + ) return except HTTPError as e: # If the error code retrieved is retryable, we retry using a diff --git a/src/python/gem5/resources/md5_utils.py b/src/python/gem5/resources/md5_utils.py index d7212ab83f..f4a1a87df5 100644 --- a/src/python/gem5/resources/md5_utils.py +++ b/src/python/gem5/resources/md5_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 The Regents of the University of California +# Copyright (c) 2022-2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,7 +31,22 @@ from _hashlib import HASH as Hash def _md5_update_from_file(filename: Path, hash: Hash) -> Hash: assert filename.is_file() - with open(str(filename), "rb") as f: + + if filename.stat().st_size < 1024 * 1024 * 100: + from ..utils.progress_bar import FakeTQDM + + # if the file is less than 100MB, no need to show a progress bar. + tqdm = FakeTQDM() + else: + from ..utils.progress_bar import tqdm + + with tqdm.wrapattr( + open(str(filename), "rb"), + "read", + miniters=1, + desc=f"Computing md5sum on {filename}", + total=filename.stat().st_size, + ) as f: for chunk in iter(lambda: f.read(4096), b""): hash.update(chunk) return hash diff --git a/src/python/gem5/utils/progress_bar.py b/src/python/gem5/utils/progress_bar.py new file mode 100644 index 0000000000..0ac13200b9 --- /dev/null +++ b/src/python/gem5/utils/progress_bar.py @@ -0,0 +1,76 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +class FakeTQDM: + """This is a fake wrapper so that the tqdm calls work whether or not it + has been installed. + """ + + def __call__(*args, **kwargs): + if args: + return args[0] + return kwargs.get("iterable", None) + + def wrapattr(self, *args, **kwargs): + if args: + return args[0] + return kwargs.get("iterable", None) + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + +try: + from tqdm.auto import tqdm + + _have_tqdm = True +except ImportError: + tqdm = FakeTQDM() + _have_tqdm = False + +# Hook for the progress bar +def progress_hook(t): + if not _have_tqdm: + # Takes 3 arguments + return lambda a, b, c: None + + last_b = [0] + + def update_to(b=1, bsize=1, tsize=None): + if tsize not in (None, -1): + t.total = tsize + displayed = t.update((b - last_b[0]) * bsize) + last_b[0] = b + return displayed + + return update_to + + +__all__ = [tqdm, progress_hook, FakeTQDM] From 65a678c75b2536f2cb5069fdc54aaf0f10528955 Mon Sep 17 00:00:00 2001 From: Tom Rollet Date: Fri, 24 Feb 2023 16:29:37 +0100 Subject: [PATCH 229/492] cpu-o3: fix false positive in AddressSanitizer AddressSanitizer found a new-delete-type-mismatch because of the custom new operator for DynInst. Adding a custom delete operator for DynInstPtr fixes this issue. It has been fixed the same way in Mozilla: https://bugzilla.mozilla.org/show_bug.cgi?id=1391500 Change-Id: I0ab4cb6d79cac88069cc2374a1deb499cdb15f02 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68357 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/cpu/o3/dyn_inst.cc | 9 +++++++++ src/cpu/o3/dyn_inst.hh | 1 + 2 files changed, 10 insertions(+) diff --git a/src/cpu/o3/dyn_inst.cc b/src/cpu/o3/dyn_inst.cc index 0b9a900446..94433cf433 100644 --- a/src/cpu/o3/dyn_inst.cc +++ b/src/cpu/o3/dyn_inst.cc @@ -187,6 +187,15 @@ DynInst::operator new(size_t count, Arrays &arrays) return buf; } +// Because of the custom "new" operator that allocates more bytes than the +// size of the DynInst object, AddressSanitizer throw new-delete-type-mismatch. +// Adding a custom delete function is enough to shut down this false positive +void +DynInst::operator delete(void *ptr) +{ + ::operator delete(ptr); +} + DynInst::~DynInst() { /* diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index ab165bbcd5..54c0385374 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -95,6 +95,7 @@ class DynInst : public ExecContext, public RefCounted }; static void *operator new(size_t count, Arrays &arrays); + static void operator delete(void* ptr); /** BaseDynInst constructor given a binary instruction. */ DynInst(const Arrays &arrays, const StaticInstPtr &staticInst, From 220995725616b2605692994f660a248ce6044aa2 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 21 Feb 2023 11:27:23 +0800 Subject: [PATCH 230/492] arch-riscv,dev: Add PLIC abstract class to support multiple PLIC implementation We should create PLIC abstract and have common interface to let HiFive platform send and clear interrupt to variable type of PLIC Change-Id: Ic3a2ffc2a2a002540b400c70c85c3495fa838f2a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68197 Maintainer: Jason Lowe-Power Reviewed-by: Yu-hsin Wang Tested-by: kokoro --- src/dev/riscv/Plic.py | 20 ++++++++++++++++++-- src/dev/riscv/SConscript | 3 ++- src/dev/riscv/plic.cc | 3 ++- src/dev/riscv/plic.hh | 22 +++++++++++++++++++--- 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/dev/riscv/Plic.py b/src/dev/riscv/Plic.py index 33b6940c3f..b4486b9350 100644 --- a/src/dev/riscv/Plic.py +++ b/src/dev/riscv/Plic.py @@ -1,4 +1,5 @@ # Copyright (c) 2021 Huawei International +# Copyright (c) 2023 Google LLC # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -39,7 +40,22 @@ from m5.proxy import * from m5.util.fdthelper import * -class Plic(BasicPioDevice): +class PlicBase(BasicPioDevice): + """ + This is abstract class of PLIC and + define interface to handle received + interrupt singal from device + """ + + type = "PlicBase" + cxx_header = "dev/riscv/plic.hh" + cxx_class = "gem5::PlicBase" + abstract = True + + pio_size = Param.Addr("PIO Size") + + +class Plic(PlicBase): """ This implementation of PLIC is based on the SiFive U54MC datasheet: @@ -51,7 +67,7 @@ class Plic(BasicPioDevice): type = "Plic" cxx_header = "dev/riscv/plic.hh" cxx_class = "gem5::Plic" - pio_size = Param.Addr(0x4000000, "PIO Size") + pio_size = 0x4000000 n_src = Param.Int("Number of interrupt sources") n_contexts = Param.Int( "Number of interrupt contexts. Usually the number " diff --git a/src/dev/riscv/SConscript b/src/dev/riscv/SConscript index af0b96b88e..6e3376bb02 100755 --- a/src/dev/riscv/SConscript +++ b/src/dev/riscv/SConscript @@ -2,6 +2,7 @@ # Copyright (c) 2021 Huawei International # Copyright (c) 2022 EXAscale Performance SYStems (EXAPSYS) +# Copyright (c) 2023 Google LLC # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -34,7 +35,7 @@ SimObject('HiFive.py', sim_objects=['HiFive', 'GenericRiscvPciHost'], SimObject('LupV.py', sim_objects=['LupV'], tags='riscv isa') SimObject('Clint.py', sim_objects=['Clint'], tags='riscv isa') SimObject('PlicDevice.py', sim_objects=['PlicIntDevice'], tags='riscv isa') -SimObject('Plic.py', sim_objects=['Plic'], tags='riscv isa') +SimObject('Plic.py', sim_objects=['PlicBase', 'Plic'], tags='riscv isa') SimObject('RTC.py', sim_objects=['RiscvRTC'], tags='riscv isa') SimObject('RiscvVirtIOMMIO.py', sim_objects=['RiscvMmioVirtIO'], tags='riscv isa') diff --git a/src/dev/riscv/plic.cc b/src/dev/riscv/plic.cc index 371af9e78a..fd42920dc5 100644 --- a/src/dev/riscv/plic.cc +++ b/src/dev/riscv/plic.cc @@ -45,6 +45,7 @@ #include "mem/packet.hh" #include "mem/packet_access.hh" #include "params/Plic.hh" +#include "params/PlicBase.hh" #include "sim/system.hh" namespace gem5 @@ -53,7 +54,7 @@ namespace gem5 using namespace RiscvISA; Plic::Plic(const Params ¶ms) : - BasicPioDevice(params, params.pio_size), + PlicBase(params), system(params.system), nSrc(params.n_src), nContext(params.n_contexts), diff --git a/src/dev/riscv/plic.hh b/src/dev/riscv/plic.hh index d077e73617..00128ee56c 100644 --- a/src/dev/riscv/plic.hh +++ b/src/dev/riscv/plic.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2021 Huawei International + * Copyright (c) 2023 Google LLC * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,6 +48,7 @@ #include "mem/packet.hh" #include "mem/packet_access.hh" #include "params/Plic.hh" +#include "params/PlicBase.hh" #include "sim/system.hh" namespace gem5 @@ -94,7 +96,21 @@ struct PlicOutput std::vector maxPriority; }; -class Plic : public BasicPioDevice +class PlicBase : public BasicPioDevice +{ + public: + typedef PlicBaseParams Params; + PlicBase(const Params ¶ms) : + BasicPioDevice(params, params.pio_size) + {} + + // Interrupt interface to send signal to PLIC + virtual void post(int src_id) = 0; + // Interrupt interface to clear signal to PLIC + virtual void clear(int src_id) = 0; +}; + +class Plic : public PlicBase { // Params protected: @@ -125,8 +141,8 @@ class Plic : public BasicPioDevice /** * Interrupt interface */ - void post(int src_id); - void clear(int src_id); + void post(int src_id) override; + void clear(int src_id) override; /** * SimObject functions From e6604bf1097ea7af4bfc54dbda9c2db1b5561ed8 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 21 Feb 2023 12:02:37 +0800 Subject: [PATCH 231/492] arch-riscv,dev: Add HiFive Base Platform This is basic abstract platform and all of RISC-V system should use platform inherit from HiFiveBase, HiFiveBase declared the common way to handle interrupt. Change-Id: I52122e1c82c200d7e6012433c2535c07d427f637 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68199 Maintainer: Jason Lowe-Power Reviewed-by: Yu-hsin Wang Tested-by: kokoro --- src/dev/riscv/HiFive.py | 154 ++++++++++++++++++++++----------------- src/dev/riscv/SConscript | 2 +- src/dev/riscv/hifive.cc | 19 +++-- src/dev/riscv/hifive.hh | 11 +-- 4 files changed, 107 insertions(+), 79 deletions(-) diff --git a/src/dev/riscv/HiFive.py b/src/dev/riscv/HiFive.py index 466968602b..5bd6363363 100755 --- a/src/dev/riscv/HiFive.py +++ b/src/dev/riscv/HiFive.py @@ -1,5 +1,6 @@ # Copyright (c) 2021 Huawei International # Copyright (c) 2022 EXAscale Performance SYStems (EXAPSYS) +# Copyright (c) 2023 Google LLC # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -63,24 +64,14 @@ class GenericRiscvPciHost(GenericPciHost): _dma_coherent = True -class HiFive(Platform): - """HiFive Platform +class HiFiveBase(Platform): + """HiFive Base Abstract Platform Implementation: This is the base class for SiFive's HiFive board series. It contains the CLINT and PLIC interrupt controllers, Uart and Disk. - Implementation details are based on SiFive - FU540-C000. https://sifive.cdn.prismic.io/ - sifive/b5e7a29c-d3c2-44ea-85fb-acc1df282e2 - 1_FU540-C000-v1p3.pdf - - Setup: - The following sections outline the required - setup for a RISC-V HiFive platform. See - configs/example/riscv/fs_linux.py for example. - Driving CLINT: CLINT has an interrupt pin which increments mtime. It can be connected to any interrupt @@ -88,7 +79,7 @@ class HiFive(Platform): abstract RTC wrapper called RiscvRTC can be used. - Attaching PLIC devices: + Driving PLIC: PLIC handles external interrupts. Interrupt PioDevices should inherit from PlicIntDevice (PCI and DMA not yet implemented). It contains @@ -96,63 +87,30 @@ class HiFive(Platform): to call platform->postPciInt(id). All PLIC interrupt devices should be returned - by _off_chip_devices(). Calling attachPlic sets - up the PLIC interrupt source count. - - Uart: - The HiFive platform also has an uart_int_id. - This is because Uart8250 uses postConsoleInt - instead of postPciInt. In the future if a Uart - that inherits PlicIntDevice is implemented, - this can be removed. - - Disk: - See fs_linux.py for setup example. - - PMAChecker: - The PMAChecker will be attached to the MMU of - each CPU (which allows them to differ). See - fs_linux.py for setup example. + by _off_chip_devices(). """ - type = "HiFive" + type = "HiFiveBase" cxx_header = "dev/riscv/hifive.hh" - cxx_class = "gem5::HiFive" + cxx_class = "gem5::HiFiveBase" # CLINT - clint = Param.Clint(Clint(pio_addr=0x2000000), "CLINT") + clint = Param.Clint(NULL, "CLINT") # PLIC - plic = Param.Plic(Plic(pio_addr=0xC000000), "PLIC") + plic = Param.PlicBase(NULL, "PLIC") - # PCI - pci_host = GenericRiscvPciHost( - conf_base=0x30000000, - conf_size="256MB", - conf_device_bits=12, - pci_pio_base=0x2F000000, - pci_mem_base=0x40000000, - ) - - # Uart - uart = RiscvUart8250(pio_addr=0x10000000) # Int source ID to redirect console interrupts to # Set to 0 if using a pci interrupt for Uart instead - uart_int_id = Param.Int(0xA, "PLIC Uart interrupt ID") - terminal = Terminal() + uart_int_id = Param.Int(0, "PLIC Uart interrupt ID") def _on_chip_devices(self): """Returns a list of on-chip peripherals""" - return [self.clint, self.plic] + return [] def _off_chip_devices(self): """Returns a list of off-chip peripherals""" - devices = [self.uart] - if hasattr(self, "disk"): - devices.append(self.disk) - if hasattr(self, "rng"): - devices.append(self.rng) - return devices + return [] def _on_chip_ranges(self): """Returns a list of on-chip peripherals @@ -172,17 +130,6 @@ class HiFive(Platform): for dev in self._off_chip_devices() ] - def attachPlic(self): - """Count number of PLIC interrupt sources""" - plic_srcs = [ - self.uart_int_id, - self.pci_host.int_base + self.pci_host.int_count, - ] - for device in self._off_chip_devices(): - if hasattr(device, "interrupt_id"): - plic_srcs.append(device.interrupt_id) - self.plic.n_src = max(plic_srcs) + 1 - def attachOnChipIO(self, bus): """Attach on-chip IO devices, needs modification to support DMA @@ -197,6 +144,83 @@ class HiFive(Platform): for device in self._off_chip_devices(): device.pio = bus.mem_side_ports + +class HiFive(HiFiveBase): + """HiFive Platform + + Implementation: + Implementation details are based on SiFive + FU540-C000. https://sifive.cdn.prismic.io/ + sifive/b5e7a29c-d3c2-44ea-85fb-acc1df282e2 + 1_FU540-C000-v1p3.pdf + + Setup: + The following sections outline the required + setup for a RISC-V HiFive platform. See + configs/example/riscv/fs_linux.py for example. + + Uart: + The HiFive platform also has an uart_int_id. + This is because Uart8250 uses postConsoleInt + instead of postPciInt. In the future if a Uart + that inherits PlicIntDevice is implemented, + this can be removed. + + Disk: + See fs_linux.py for setup example. + + PMAChecker: + The PMAChecker will be attached to the MMU of + each CPU (which allows them to differ). See + fs_linux.py for setup example. + """ + + # CLINT + clint = Clint(pio_addr=0x2000000) + + # PLIC + plic = Plic(pio_addr=0xC000000) + + # PCI + pci_host = GenericRiscvPciHost( + conf_base=0x30000000, + conf_size="256MB", + conf_device_bits=12, + pci_pio_base=0x2F000000, + pci_mem_base=0x40000000, + ) + + # Uart + uart = RiscvUart8250(pio_addr=0x10000000) + # Int source ID to redirect console interrupts to + # Set to 0 if using a pci interrupt for Uart instead + uart_int_id = 0xA + terminal = Terminal() + + def _on_chip_devices(self): + """Returns a list of on-chip peripherals""" + return [self.clint, self.plic] + + def _off_chip_devices(self): + """Returns a list of off-chip peripherals""" + devices = [self.uart] + if hasattr(self, "disk"): + devices.append(self.disk) + if hasattr(self, "rng"): + devices.append(self.rng) + return devices + + def attachPlic(self): + """Count and set number of PLIC interrupt sources""" + plic_srcs = [ + self.uart_int_id, + self.pci_host.int_base + self.pci_host.int_count, + ] + for device in self._off_chip_devices(): + if hasattr(device, "interrupt_id"): + plic_srcs.append(device.interrupt_id) + self.plic.n_src = max(plic_srcs) + 1 + def setNumCores(self, num_cpu): """Sets the PLIC and CLINT to have the right number of threads and contexts. Assumes that the cores have a single hardware thread. diff --git a/src/dev/riscv/SConscript b/src/dev/riscv/SConscript index 6e3376bb02..be5ff8defe 100755 --- a/src/dev/riscv/SConscript +++ b/src/dev/riscv/SConscript @@ -30,7 +30,7 @@ Import('*') -SimObject('HiFive.py', sim_objects=['HiFive', 'GenericRiscvPciHost'], +SimObject('HiFive.py', sim_objects=['HiFiveBase', 'GenericRiscvPciHost'], tags='riscv isa') SimObject('LupV.py', sim_objects=['LupV'], tags='riscv isa') SimObject('Clint.py', sim_objects=['Clint'], tags='riscv isa') diff --git a/src/dev/riscv/hifive.cc b/src/dev/riscv/hifive.cc index 74ae346f1f..0487eabba2 100644 --- a/src/dev/riscv/hifive.cc +++ b/src/dev/riscv/hifive.cc @@ -1,5 +1,6 @@ /* * Copyright (c) 2021 Huawei International + * Copyright (c) 2023 Google LLC * All rights reserved * * The license below extends only to copyright in the software and shall @@ -39,7 +40,7 @@ #include "dev/riscv/clint.hh" #include "dev/riscv/plic.hh" -#include "params/HiFive.hh" +#include "params/HiFiveBase.hh" #include "sim/system.hh" namespace gem5 @@ -47,44 +48,46 @@ namespace gem5 using namespace RiscvISA; -HiFive::HiFive(const Params ¶ms) : +HiFiveBase::HiFiveBase(const Params ¶ms) : Platform(params), clint(params.clint), plic(params.plic), uartIntID(params.uart_int_id) { + fatal_if(clint == nullptr, "CLINT should not be NULL"); + fatal_if(plic == nullptr, "PLIC should not be NULL"); } void -HiFive::postConsoleInt() +HiFiveBase::postConsoleInt() { plic->post(uartIntID); } void -HiFive::clearConsoleInt() +HiFiveBase::clearConsoleInt() { plic->clear(uartIntID); } void -HiFive::postPciInt(int line) +HiFiveBase::postPciInt(int line) { plic->post(line); } void -HiFive::clearPciInt(int line) +HiFiveBase::clearPciInt(int line) { plic->clear(line); } void -HiFive::serialize(CheckpointOut &cp) const +HiFiveBase::serialize(CheckpointOut &cp) const { } void -HiFive::unserialize(CheckpointIn &cp) +HiFiveBase::unserialize(CheckpointIn &cp) { } diff --git a/src/dev/riscv/hifive.hh b/src/dev/riscv/hifive.hh index 78d45046b1..99d7ae67b3 100644 --- a/src/dev/riscv/hifive.hh +++ b/src/dev/riscv/hifive.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2021 Huawei International + * Copyright (c) 2023 Google LLC * All rights reserved * * The license below extends only to copyright in the software and shall @@ -41,23 +42,23 @@ #include "dev/platform.hh" #include "dev/riscv/clint.hh" #include "dev/riscv/plic.hh" -#include "params/HiFive.hh" +#include "params/HiFiveBase.hh" namespace gem5 { using namespace RiscvISA; -class HiFive : public Platform +class HiFiveBase : public Platform { public: Clint *clint; - Plic *plic; + PlicBase *plic; int uartIntID; public: - typedef HiFiveParams Params; - HiFive(const Params ¶ms); + typedef HiFiveBaseParams Params; + HiFiveBase(const Params ¶ms); void postConsoleInt() override; From 75001363923d0f64a8b1454890db575e7c7ca099 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 2 Mar 2023 09:25:17 -0800 Subject: [PATCH 232/492] mem: Add a parameter which will make a memory truly a ROM. This piggy-backs on the writeOK method which already exists. It also modifies the flags returned as part of the memory's backdoor descriptor which doesn't enforce that the memory is read only, but will let the other party know it's expected not to write to it. Change-Id: Ib95e619c76c327d302e62a88515a92af11815981 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68557 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Matthew Poremba --- src/mem/AbstractMemory.py | 2 ++ src/mem/abstract_mem.cc | 7 ++++--- src/mem/abstract_mem.hh | 9 ++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mem/AbstractMemory.py b/src/mem/AbstractMemory.py index ea88fd879c..7ab24bc118 100644 --- a/src/mem/AbstractMemory.py +++ b/src/mem/AbstractMemory.py @@ -74,3 +74,5 @@ class AbstractMemory(ClockedObject): image_file = Param.String( "", "Image to load into memory as its initial contents" ) + + writeable = Param.Bool(True, "Allow writes to this memory") diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc index 03f2557d63..9340f7e96f 100644 --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -59,10 +59,11 @@ namespace memory AbstractMemory::AbstractMemory(const Params &p) : ClockedObject(p), range(p.range), pmemAddr(NULL), backdoor(params().range, nullptr, - (MemBackdoor::Flags)(MemBackdoor::Readable | - MemBackdoor::Writeable)), + (MemBackdoor::Flags)(p.writeable ? + MemBackdoor::Readable | MemBackdoor::Writeable : + MemBackdoor::Readable)), confTableReported(p.conf_table_reported), inAddrMap(p.in_addr_map), - kvmMap(p.kvm_map), _system(NULL), + kvmMap(p.kvm_map), writeable(p.writeable), _system(NULL), stats(*this) { panic_if(!range.valid() || !range.size(), diff --git a/src/mem/abstract_mem.hh b/src/mem/abstract_mem.hh index 53b794012d..7f12487421 100644 --- a/src/mem/abstract_mem.hh +++ b/src/mem/abstract_mem.hh @@ -129,6 +129,9 @@ class AbstractMemory : public ClockedObject // Should KVM map this memory for the guest const bool kvmMap; + // Are writes allowed to this memory + const bool writeable; + std::list lockedAddrList; // helper function for checkLockedAddrs(): we really want to @@ -149,8 +152,12 @@ class AbstractMemory : public ClockedObject // requesting execution context), 'true' otherwise. Note that // this method must be called on *all* stores since even // non-conditional stores must clear any matching lock addresses. - bool writeOK(PacketPtr pkt) { + bool + writeOK(PacketPtr pkt) + { const RequestPtr &req = pkt->req; + if (!writeable) + return false; if (lockedAddrList.empty()) { // no locked addrs: nothing to check, store_conditional fails bool isLLSC = pkt->isLLSC(); From e3f51e595c0a49d2d3fca5d720f350f50b89c3dd Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 2 Mar 2023 16:34:22 -0800 Subject: [PATCH 233/492] cpu: Allow PcCountTracker to compile in NULL ISA While the PcCountTracker isn't necessary in the NULL ISA, the structure of the standard library requires us to have it built when running the replacement policy tests, which should fix these tests failing within the nightlies at the moment. Change-Id: I225b7923f2a11d351c24bdceba3ded4ed2b3bc87 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68597 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/cpu/probes/SConscript | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/cpu/probes/SConscript b/src/cpu/probes/SConscript index c96ca78a0c..9f43317284 100644 --- a/src/cpu/probes/SConscript +++ b/src/cpu/probes/SConscript @@ -26,12 +26,11 @@ Import("*") -if not env["CONF"]["USE_NULL_ISA"]: - SimObject( - "PcCountTracker.py", - sim_objects=["PcCountTracker", "PcCountTrackerManager"], - ) - Source("pc_count_tracker.cc") - Source("pc_count_tracker_manager.cc") +SimObject( + "PcCountTracker.py", + sim_objects=["PcCountTracker", "PcCountTrackerManager"], +) +Source("pc_count_tracker.cc") +Source("pc_count_tracker_manager.cc") - DebugFlag("PcCountTracker") +DebugFlag("PcCountTracker") From 6884aeb86a73125969c293b9d3fedd242e104985 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Tue, 28 Feb 2023 23:24:49 -0800 Subject: [PATCH 234/492] base: Fix gcc-13 build error This change adds relevant errors that allow building with gcc-13. Change-Id: Ib97a90ef647a9cd9ec1bf1f2bde61daca85de427 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68497 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/arch/arm/insts/crypto.hh | 2 ++ src/base/cprintf_formats.hh | 1 + src/mem/ruby/common/Histogram.hh | 1 + 3 files changed, 4 insertions(+) diff --git a/src/arch/arm/insts/crypto.hh b/src/arch/arm/insts/crypto.hh index 23eda5b8ab..089cbe5726 100644 --- a/src/arch/arm/insts/crypto.hh +++ b/src/arch/arm/insts/crypto.hh @@ -38,6 +38,8 @@ #ifndef __ARCH_ARM_INSTS_CRYPTO_HH__ #define __ARCH_ARM_INSTS_CRYPTO_HH__ +#include + namespace gem5 { diff --git a/src/base/cprintf_formats.hh b/src/base/cprintf_formats.hh index 02ba49699e..4a64780c4a 100644 --- a/src/base/cprintf_formats.hh +++ b/src/base/cprintf_formats.hh @@ -29,6 +29,7 @@ #ifndef __BASE_CPRINTF_FORMATS_HH__ #define __BASE_CPRINTF_FORMATS_HH__ +#include #include #include #include diff --git a/src/mem/ruby/common/Histogram.hh b/src/mem/ruby/common/Histogram.hh index bfd3ae0aba..cdc27af11f 100644 --- a/src/mem/ruby/common/Histogram.hh +++ b/src/mem/ruby/common/Histogram.hh @@ -29,6 +29,7 @@ #ifndef __MEM_RUBY_COMMON_HISTOGRAM_HH__ #define __MEM_RUBY_COMMON_HISTOGRAM_HH__ +#include #include #include From da050eeddea5b3281d63e4f647a19c80880ebeae Mon Sep 17 00:00:00 2001 From: handsomeliu Date: Fri, 3 Mar 2023 16:32:01 +0800 Subject: [PATCH 235/492] base: support calculating the intersection of two AddrRange Change-Id: I2f089039c709fe4c3f7086263fb56470c7713bad Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68617 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/base/addr_range.hh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/base/addr_range.hh b/src/base/addr_range.hh index 07bd255d26..11fb1cd668 100644 --- a/src/base/addr_range.hh +++ b/src/base/addr_range.hh @@ -732,6 +732,22 @@ class AddrRange { return !(*this == r); } + + /** + * @ingroup api_addr_range + */ + AddrRange + operator&(const AddrRange& r) const + { + panic_if(this->interleaved() || r.interleaved(), + "Cannot calculate intersection of interleaved ranges."); + Addr start = std::max(this->_start, r._start); + Addr end = std::min(this->_end, r._end); + if (end <= start) { + return AddrRange(0, 0); + } + return AddrRange(start, end); + } }; static inline AddrRangeList From fd7006f4f1a34d6bbfe0c7d62be72ced43281462 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Fri, 3 Mar 2023 17:24:58 -0800 Subject: [PATCH 236/492] arch-riscv: Revert CSR instruction fixes This reverts commit 4b1c24542065380c6cff7ab2baa25e216a0ad38e and commit 89c49d1ab06ea5364ab1f80586f8b01c0297cb12 because they are causing the RISC-V Ubuntu boot test within the nightly tests to hang and time out. Change-Id: Ia4d8098ec940cb5900256c8cede0146256c851e5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68637 Tested-by: kokoro Reviewed-by: Roger Chang Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power --- src/arch/riscv/insts/standard.hh | 19 ++----------------- src/arch/riscv/isa/formats/standard.isa | 15 ++++++--------- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh index 2dfe73aedf..5b0e8c2c22 100644 --- a/src/arch/riscv/insts/standard.hh +++ b/src/arch/riscv/insts/standard.hh @@ -91,33 +91,18 @@ class CSROp : public RiscvStaticInst protected: uint64_t csr; uint64_t uimm; - bool read; - bool write; /// Constructor CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : RiscvStaticInst(mnem, _machInst, __opClass), - csr(FUNCT12), uimm(CSRIMM), read(true), write(true) + csr(FUNCT12), uimm(CSRIMM) { if (csr == CSR_SATP) { flags[IsSquashAfter] = true; } - if (strcmp(mnemonic, "csrrw") == 0 || - strcmp(mnemonic, "csrrwi") == 0) { - if (RD == 0){ - read = false; - } - } else if (strcmp(mnemonic, "csrrs") == 0 || - strcmp(mnemonic, "csrrc") == 0 || - strcmp(mnemonic, "csrrsi") == 0 || - strcmp(mnemonic, "csrrci") == 0 ){ - if (RS1 == 0 || uimm == 0) { - write = false; - } - } } - std::string generateDisassembly( + std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; }; diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa index c94a0bcdbd..bb500f5f49 100644 --- a/src/arch/riscv/isa/formats/standard.isa +++ b/src/arch/riscv/isa/formats/standard.isa @@ -358,7 +358,7 @@ def template CSRExecute {{ %(op_decl)s; %(op_rd)s; - RegVal data = 0, olddata = 0, nonmaskdata = 0; + RegVal data, olddata; auto lowestAllowedMode = (PrivilegeMode)bits(csr, 9, 8); auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV); if (pm < lowestAllowedMode) { @@ -380,13 +380,11 @@ def template CSRExecute {{ break; } - if (read) { - if (csr == CSR_FCSR) { + if (csr == CSR_FCSR) { olddata = xc->readMiscReg(MISCREG_FFLAGS) | - (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET); - } else { + (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET); + } else { olddata = xc->readMiscReg(midx); - } } olddata = rvZext(olddata); auto olddata_all = olddata; @@ -397,9 +395,8 @@ def template CSRExecute {{ %(code)s; - nonmaskdata = data & ~maskVal; data &= maskVal; - if (write) { + if (data != olddata) { if (bits(csr, 11, 10) == 0x3) { return std::make_shared( csprintf("CSR %s is read-only\n", csrName), machInst); @@ -420,7 +417,7 @@ def template CSRExecute {{ case CSR_SIP: case CSR_SIE: case CSR_UIP: case CSR_UIE: case CSR_MSTATUS: case CSR_SSTATUS: case CSR_USTATUS: - if (nonmaskdata == 0) { + if (newdata_all != olddata_all) { xc->setMiscReg(midx, newdata_all); } else { return std::make_shared( From 8a11b39c41353ec5345250a0ca370b89d14e97bd Mon Sep 17 00:00:00 2001 From: Jasjeet Rangi Date: Wed, 23 Nov 2022 12:31:12 -0800 Subject: [PATCH 237/492] cpu: Move fetch stats from simple and minor to base This summarizes a series of changes to move general Simple, Minor, O3 CPU stats to BaseCPU. This commit focuses on moving numBranches from SimpleCPU to the FetchCPUStats in the BaseCPU, and numFetchSuspends from MinorCPU into FetchCPUStats. More general information about this relation chain is below 1. Summary: Moved general CPU stats found across Simple, Minor, and O3 CPU models into BaseCPU through new stat groups. The stat groups are FetchCPUStats, ExecuteCPUStats, and CommitCPUStats. Implemented the committedControl stat vector found in MinorCPU for Simple and O3 CPU. Implemented the numStoreInsts stat found in SimpleCPU for O3CPU. IPC and CPI stats are now tracked at the core and thread level in BaseCPU and are made universal for simple, minor, o3, and kvm CPUs. Duplicate stats across the models are merged into a single stat in BaseCPU under the same stat name. This change does not implement every general level stat moved to BaseCPU for every model. 2. Stat API Changes a. SimpleCPU: statExecutedInstType vector unified into committedInstType numCondCtrlInsts unified into committedControl::isControl b. O3CPU: i. Fetch Stage branches in fetch unified into with numBranches rate renamed to fetchRate insts unified into with numInsts ii. Execute Stage Regfile stats unified into base with use of Simple's stat naming numRefs in IEW unified into numMemRefs numRate from IEW renamed to instRate iii. Commit Stage committedInsts is renamed to numInstsNotNOP committedOps is renamed to numOpsNotNOP instsCommitted is unified into numInsts opsCommitted is unified into numOps branches is unified into committedControl::isControl floating is unified into numFpInsts integer is unified into numIntInsts loads is unified into numLoadInsts memRefs is renamed to numMemRefs vectorInstructions is unified into numVecInsts 3. Details: Created three stat groups in BaseCPU. FetchCPUStats track statistics related to the fetch stage. ExecuteCPUStats track statistics related to the execute stage. CommitCPUStats track statistics related to the commit stage. There are three vectors in Base that store unique pointers to per thread instances of these stat groups. The stat group pointer for thread i is accessible at index i of one of these vectors. For example, stat numCCRegReads of the execute stage for thread 0 can be accessed with executeStats[0]->numCCRegReads. The stats.txt output will print the thread ID of the stat group. For example, numVecRegReads on thread 0 of a single core prints as "board.processor.cores.core.executeStats0.numVecRegReads". NOTE: Multithreading in gem5 is untested. Therefore per thread stats output in stats.txt is not currently guaranteed to be correctly formatted. For FetchCPUStats, the stats moved from SimpleCPU are numBranches and numInsts. From MinorCPU, the stat moved is numFetchSuspends. From O3CPU, the stats moved are from the O3 fetch stage: Stat branches is unified into numBranches, stat rate is renamed to fetchRate in Base, stat insts is unified into numInsts, stat icacheStallCycles keeps the same name in Base. For ExecuteCPUStats, the stats moved from SimpleCPU are dcacheStallCycles, numCCRegReads, numCCRegWrites, numFpAluAccesses, numFpRegReads, numFpRegWrites, numIntAluAccesses, numIntRegReads, numIntRegWrites, numMemRefs, numMiscRegReads, numMiscRegWrites, numVecAluAccesses, numVecPredRegReads, numVecPredRegWrites, numVecRegReads, numVecRegWrites. The stat moved from MinorCPU is numDiscardedOps. From O3, the Regfile stats in CPU are unified into the reg stats in Base and use the names found originally in SimpleCPU. From O3 IEW stage, numInsts keeps the same name in Base, numBranches is unified into numBranches in base, numNop keeps the same name in Base, numRefs is unified into numMemRefs in Base, numLoadInsts and numStoreInsts are moved into Base, numRate is renamed to instRate in base. For CommitCPUStats, the stats moved from SimpleCPU are numCondCtrlInsts, numFpInsts, numIntInsts, numLoadInsts, numStoreInsts, numVecInsts. The stats moved from MinorCPU are numInsts, committedInstType, and committedControl. statExecutedInstType of SimpleCPU is unified with committedInstType of MinorCPU. Implemented committedControl stats from MinorCPU in Simple and O3 CPU. In MinorCPU, this stat was a 2D vector, where the first dimension is the thread ID. In base it is now a 1D vector that is tied to a thread ID via the commitStats vector that the object is accessible through. From the O3 commit stage, committedInsts is renamed to numInstsNotNOP, committedOps is renamed to numOpsNotNOP, instsCommitted is unified into numInsts, opsCommitted is renamed to numOps, committedInstType is unified into committedInstType from Minor, branches is removed because it duplicates committedControl::IsControl, floating is unified into numFpInsts, interger is unified into numIntInsts, loads is unified into numLoadInsts, numStoreInsts is implemented for tracking in O3, memRefs is renamed to numMemRefs, vectorInstructions is unified into numVecInsts. Note that numCondCtrlInsts of Simple is unified into committedControl::IsCondCtrl. Implemented IPC and CPI tracking inside BaseCPU. In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core committed instructions and operations. In BaseCPU::FetchCPUStats, numInsts and numOps track per thread fetched instructions and operations. In BaseCPU::CommitCPUStats, numInsts tracks per thread executed instructions. In BaseCPU::CommitCPUStats, numInsts and numOps track per thread committed instructions and operations. In BaseSimpleCPU, the countInst() function has been split into countInst(), countFetchInst(), and countCommitInst(). The stat count incrementation step of countInst() has been removed and delegated to the other two functions. countFetchInst() increments numInsts and numOps of the FetchCPUStats group for a thread. countCommitInst() increments the numInsts and numOps of the CommitCPUStats group for a thread and of the BaseCPUStats group for a CPU core. These functions are called in the appropriate stage within timing.cc and atomic.cc. The call to countInst() is left unchanged. countFetchInst() is called in preExecute(). countCommitInst() is called in postExecute(). For MinorCPU, only the commit level numInsts and numOps stats have been implemented. IPC and CPI stats have been added to BaseCPUStats (core level) and CommitCPUStats (thread level). The formulas for the IPC and CPI stats in CommitCPUStats are set in the BaseCPU constructor, after the CommitCPUStats stat group object has been created. These replace IPC, CPI, totalIpc, and totalCpi stats in O3. Replaced committedInsts stats of KVM CPU with commitStats.numInsts of BaseCPU. This results in IPC and CPI printing in stats.txt for KVM simulations. This change does not implement most general stats found in one or two model for all others. Jira Ticket: https://gem5.atlassian.net/browse/GEM5-1304 Change-Id: I3c852f8dba3268c71b7a3415480fb63d8dc30cb7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66031 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/cpu/base.cc | 19 +++++++++++++++++++ src/cpu/base.hh | 16 ++++++++++++++++ src/cpu/minor/execute.cc | 2 +- src/cpu/minor/stats.cc | 2 -- src/cpu/minor/stats.hh | 3 --- src/cpu/simple/base.cc | 2 +- src/cpu/simple/exec_context.hh | 7 ------- 7 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index d2c0a78d44..1d293397e5 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -191,6 +191,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) modelResetPort.onChange([this](const bool &new_val) { setReset(new_val); }); + // create a stat group object for each thread on this core + fetchStats.reserve(numThreads); + for (int i = 0; i < numThreads; i++) { + fetchStats.emplace_back(new FetchCPUStats(this, i)); + } } void @@ -827,4 +832,18 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent) hostOpRate = simOps / hostSeconds; } +BaseCPU:: +FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) + : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches fetched"), + ADD_STAT(numFetchSuspends, statistics::units::Count::get(), + "Number of times Execute suspended instruction fetching") + +{ + numBranches + .prereq(numBranches); + +} + } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 084d9b9305..d6e5d38838 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -43,6 +43,7 @@ #define __CPU_BASE_HH__ #include +#include #include "arch/generic/interrupts.hh" #include "base/statistics.hh" @@ -676,6 +677,21 @@ class BaseCPU : public ClockedObject const Cycles pwrGatingLatency; const bool powerGatingOnIdle; EventFunctionWrapper enterPwrGatingEvent; + + public: + struct FetchCPUStats : public statistics::Group + { + FetchCPUStats(statistics::Group *parent, int thread_id); + + /* Total number of branches fetched */ + statistics::Scalar numBranches; + + /* Number of times fetch was asked to suspend by Execute */ + statistics::Scalar numFetchSuspends; + + }; + + std::vector> fetchStats; }; } // namespace gem5 diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 5eaaf5804e..323ae2982b 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -1054,7 +1054,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" " inst: %s\n", thread_id, *inst); - cpu.stats.numFetchSuspends++; + cpu.fetchStats[thread_id]->numFetchSuspends++; updateBranchData(thread_id, BranchData::SuspendThread, inst, resume_pc, branch); diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index 64d4c475e0..e9ca562c16 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -52,8 +52,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu) ADD_STAT(numDiscardedOps, statistics::units::Count::get(), "Number of ops (including micro ops) which were discarded before " "commit"), - ADD_STAT(numFetchSuspends, statistics::units::Count::get(), - "Number of times Execute suspended instruction fetching"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " "for an interrupt"), diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index 1ab81f4407..524d20f85d 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -68,9 +68,6 @@ struct MinorStats : public statistics::Group /** Number of ops discarded before committing */ statistics::Scalar numDiscardedOps; - /** Number of times fetch was asked to suspend by Execute */ - statistics::Scalar numFetchSuspends; - /** Number of cycles in quiescent state */ statistics::Scalar quiesceCycles; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 768f63ede5..b2a11fd84b 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -396,7 +396,7 @@ BaseSimpleCPU::postExecute() } if (curStaticInst->isControl()) { - ++t_info.execContextStats.numBranches; + ++fetchStats[t_info.thread->threadId()]->numBranches; } /* Power model statistics */ diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 0f20763f28..d4bb017481 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -152,8 +152,6 @@ class SimpleExecContext : public ExecContext "ICache total stall cycles"), ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), "DCache total stall cycles"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches fetched"), ADD_STAT(numPredictedBranches, statistics::units::Count::get(), "Number of branches predicted as taken"), ADD_STAT(numBranchMispred, statistics::units::Count::get(), @@ -203,9 +201,6 @@ class SimpleExecContext : public ExecContext numIdleCycles = idleFraction * cpu->baseStats.numCycles; numBusyCycles = notIdleFraction * cpu->baseStats.numCycles; - numBranches - .prereq(numBranches); - numPredictedBranches .prereq(numPredictedBranches); @@ -297,8 +292,6 @@ class SimpleExecContext : public ExecContext statistics::Scalar dcacheStallCycles; /// @{ - /// Total number of branches fetched - statistics::Scalar numBranches; /// Number of branches predicted as taken statistics::Scalar numPredictedBranches; /// Number of misprediced branches From fd2d80baa39645842985a489edd20e0fab15b9d1 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 00:25:26 -0800 Subject: [PATCH 238/492] cpu: Move execute stats from simple and minor to base Created stat group ExecuteCPUStats in BaseCPU and moved stats from the simple and minor cpu models. The stats moved from SimpleCPU are dcacheStallCycles, icacheStallCycles, numCCRegReads, numCCRegWrites, numFpAluAccesses, numFpRegReads, numFpRegWrites, numIntAluAccesses, numIntRegReads, numIntRegWrites, numMemRefs, numMiscRegReads, numMiscRegWrites, numVecAluAccesses, numVecPredRegReads, numVecPredRegWrites, numVecRegReads, numVecRegWrites. The stat moved from MinorCPU is numDiscardedOps. Also, ccRegfileReads, ccRegfileWrites, fpRegfileReads, fpRegfileWrites, intRegfileReads, intRegfileWrites, miscRegfileReads, miscRegfileWrites, vecPredRegfileReads, vecPredRegfileWrites, vecRegfileReads, and vecRegfileWrites are removed from cpu.hh and cpu.cc in O3CPU. The corresponding stats in BaseCPU::ExecuteCPUStats are used instead. Changed the getReg, getWritableReg, and setReg functions in the O3 CPU object to take the thread ID as a parameter. This is because the stats in base are stored in vectors that are indexed by thread ID. Change-Id: I801c5ceb4c70b7b281127569f11c6ee98f614b27 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67390 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/cpu/base.cc | 76 ++++++++++++++++++++ src/cpu/base.hh | 48 +++++++++++++ src/cpu/minor/execute.cc | 2 +- src/cpu/minor/stats.cc | 3 - src/cpu/minor/stats.hh | 3 - src/cpu/o3/cpu.cc | 120 ++++++++----------------------- src/cpu/o3/cpu.hh | 28 ++------ src/cpu/o3/dyn_inst.hh | 14 ++-- src/cpu/simple/base.cc | 8 +-- src/cpu/simple/exec_context.hh | 125 +++++---------------------------- 10 files changed, 190 insertions(+), 237 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 1d293397e5..b10c731e17 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -193,8 +193,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) }); // create a stat group object for each thread on this core fetchStats.reserve(numThreads); + executeStats.reserve(numThreads); for (int i = 0; i < numThreads; i++) { fetchStats.emplace_back(new FetchCPUStats(this, i)); + executeStats.emplace_back(new ExecuteCPUStats(this, i)); } } @@ -846,4 +848,78 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) } +// means it is incremented in a vector indexing and not directly +BaseCPU:: +ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) + : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), + ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), + "DCache total stall cycles"), + ADD_STAT(numCCRegReads, statistics::units::Count::get(), + "Number of times the CC registers were read"), + ADD_STAT(numCCRegWrites, statistics::units::Count::get(), + "Number of times the CC registers were written"), + ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), + "Number of float alu accesses"), + ADD_STAT(numFpRegReads, statistics::units::Count::get(), + "Number of times the floating registers were read"), + ADD_STAT(numFpRegWrites, statistics::units::Count::get(), + "Number of times the floating registers were written"), + ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(numIntRegReads, statistics::units::Count::get(), + "Number of times the integer registers were read"), + ADD_STAT(numIntRegWrites, statistics::units::Count::get(), + "Number of times the integer registers were written"), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory refs"), + ADD_STAT(numMiscRegReads, statistics::units::Count::get(), + "Number of times the Misc registers were read"), + ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), + "Number of times the Misc registers were written"), + ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), + "Number of vector alu accesses"), + ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), + "Number of times the predicate registers were read"), + ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), + "Number of times the predicate registers were written"), + ADD_STAT(numVecRegReads, statistics::units::Count::get(), + "Number of times the vector registers were read"), + ADD_STAT(numVecRegWrites, statistics::units::Count::get(), + "Number of times the vector registers were written"), + ADD_STAT(numDiscardedOps, statistics::units::Count::get(), + "Number of ops (including micro ops) which were discarded before " + "commit") +{ + dcacheStallCycles + .prereq(dcacheStallCycles); + numCCRegReads + .prereq(numCCRegReads) + .flags(statistics::nozero); + numCCRegWrites + .prereq(numCCRegWrites) + .flags(statistics::nozero); + numFpAluAccesses + .prereq(numFpAluAccesses); + numFpRegReads + .prereq(numFpRegReads); + numIntAluAccesses + .prereq(numIntAluAccesses); + numIntRegReads + .prereq(numIntRegReads); + numIntRegWrites + .prereq(numIntRegWrites); + numMiscRegReads + .prereq(numMiscRegReads); + numMiscRegWrites + .prereq(numMiscRegWrites); + numVecPredRegReads + .prereq(numVecPredRegReads); + numVecPredRegWrites + .prereq(numVecPredRegWrites); + numVecRegReads + .prereq(numVecRegReads); + numVecRegWrites + .prereq(numVecRegWrites); +} + } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index d6e5d38838..ad6fa469a3 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -691,7 +691,55 @@ class BaseCPU : public ClockedObject }; + struct ExecuteCPUStats: public statistics::Group + { + ExecuteCPUStats(statistics::Group *parent, int thread_id); + + /* Number of cycles stalled for D-cache responses */ + statistics::Scalar dcacheStallCycles; + + /* Number of condition code register file accesses */ + statistics::Scalar numCCRegReads; + statistics::Scalar numCCRegWrites; + + /* number of float alu accesses */ + statistics::Scalar numFpAluAccesses; + + /* Number of float register file accesses */ + statistics::Scalar numFpRegReads; + statistics::Scalar numFpRegWrites; + + /* Number of integer alu accesses */ + statistics::Scalar numIntAluAccesses; + + /* Number of integer register file accesses */ + statistics::Scalar numIntRegReads; + statistics::Scalar numIntRegWrites; + + /* number of simulated memory references */ + statistics::Scalar numMemRefs; + + /* Number of misc register file accesses */ + statistics::Scalar numMiscRegReads; + statistics::Scalar numMiscRegWrites; + + /* Number of vector alu accesses */ + statistics::Scalar numVecAluAccesses; + + /* Number of predicate register file accesses */ + mutable statistics::Scalar numVecPredRegReads; + statistics::Scalar numVecPredRegWrites; + + /* Number of vector register file accesses */ + mutable statistics::Scalar numVecRegReads; + statistics::Scalar numVecRegWrites; + + /* Number of ops discarded before committing */ + statistics::Scalar numDiscardedOps; + }; + std::vector> fetchStats; + std::vector> executeStats; }; } // namespace gem5 diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 323ae2982b..d657de5225 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -1368,7 +1368,7 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, *inst, ex_info.streamSeqNum); if (fault == NoFault) - cpu.stats.numDiscardedOps++; + cpu.executeStats[thread_id]->numDiscardedOps++; } /* Mark the mem inst as being in the LSQ */ diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index e9ca562c16..10e7573afd 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -49,9 +49,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu) "Number of instructions committed"), ADD_STAT(numOps, statistics::units::Count::get(), "Number of ops (including micro ops) committed"), - ADD_STAT(numDiscardedOps, statistics::units::Count::get(), - "Number of ops (including micro ops) which were discarded before " - "commit"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " "for an interrupt"), diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index 524d20f85d..e5d018679d 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -65,9 +65,6 @@ struct MinorStats : public statistics::Group /** Number of simulated insts and microops */ statistics::Scalar numOps; - /** Number of ops discarded before committing */ - statistics::Scalar numDiscardedOps; - /** Number of cycles in quiescent state */ statistics::Scalar quiesceCycles; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index d2bacaa523..90df3b349e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -344,31 +344,7 @@ CPU::CPUStats::CPUStats(CPU *cpu) "IPC: Instructions Per Cycle"), ADD_STAT(totalIpc, statistics::units::Rate< statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: Total IPC of All Threads"), - ADD_STAT(intRegfileReads, statistics::units::Count::get(), - "Number of integer regfile reads"), - ADD_STAT(intRegfileWrites, statistics::units::Count::get(), - "Number of integer regfile writes"), - ADD_STAT(fpRegfileReads, statistics::units::Count::get(), - "Number of floating regfile reads"), - ADD_STAT(fpRegfileWrites, statistics::units::Count::get(), - "Number of floating regfile writes"), - ADD_STAT(vecRegfileReads, statistics::units::Count::get(), - "number of vector regfile reads"), - ADD_STAT(vecRegfileWrites, statistics::units::Count::get(), - "number of vector regfile writes"), - ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(), - "number of predicate regfile reads"), - ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(), - "number of predicate regfile writes"), - ADD_STAT(ccRegfileReads, statistics::units::Count::get(), - "number of cc regfile reads"), - ADD_STAT(ccRegfileWrites, statistics::units::Count::get(), - "number of cc regfile writes"), - ADD_STAT(miscRegfileReads, statistics::units::Count::get(), - "number of misc regfile reads"), - ADD_STAT(miscRegfileWrites, statistics::units::Count::get(), - "number of misc regfile writes") + "IPC: Total IPC of All Threads") { // Register any of the O3CPU's stats here. timesIdled @@ -407,42 +383,6 @@ CPU::CPUStats::CPUStats(CPU *cpu) totalIpc .precision(6); totalIpc = sum(committedInsts) / cpu->baseStats.numCycles; - - intRegfileReads - .prereq(intRegfileReads); - - intRegfileWrites - .prereq(intRegfileWrites); - - fpRegfileReads - .prereq(fpRegfileReads); - - fpRegfileWrites - .prereq(fpRegfileWrites); - - vecRegfileReads - .prereq(vecRegfileReads); - - vecRegfileWrites - .prereq(vecRegfileWrites); - - vecPredRegfileReads - .prereq(vecPredRegfileReads); - - vecPredRegfileWrites - .prereq(vecPredRegfileWrites); - - ccRegfileReads - .prereq(ccRegfileReads); - - ccRegfileWrites - .prereq(ccRegfileWrites); - - miscRegfileReads - .prereq(miscRegfileReads); - - miscRegfileWrites - .prereq(miscRegfileWrites); } void @@ -1019,7 +959,7 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const RegVal CPU::readMiscReg(int misc_reg, ThreadID tid) { - cpuStats.miscRegfileReads++; + executeStats[tid]->numMiscRegReads++; return isa[tid]->readMiscReg(misc_reg); } @@ -1032,29 +972,29 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) void CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { - cpuStats.miscRegfileWrites++; + executeStats[tid]->numMiscRegWrites++; isa[tid]->setMiscReg(misc_reg, val); } RegVal -CPU::getReg(PhysRegIdPtr phys_reg) +CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) { switch (phys_reg->classValue()) { case IntRegClass: - cpuStats.intRegfileReads++; + executeStats[tid]->numIntRegReads++; break; case FloatRegClass: - cpuStats.fpRegfileReads++; + executeStats[tid]->numFpRegReads++; break; case CCRegClass: - cpuStats.ccRegfileReads++; + executeStats[tid]->numCCRegReads++; break; case VecRegClass: case VecElemClass: - cpuStats.vecRegfileReads++; + executeStats[tid]->numVecRegReads++; break; case VecPredRegClass: - cpuStats.vecPredRegfileReads++; + executeStats[tid]->numVecPredRegReads++; break; default: break; @@ -1063,24 +1003,24 @@ CPU::getReg(PhysRegIdPtr phys_reg) } void -CPU::getReg(PhysRegIdPtr phys_reg, void *val) +CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) { switch (phys_reg->classValue()) { case IntRegClass: - cpuStats.intRegfileReads++; + executeStats[tid]->numIntRegReads++; break; case FloatRegClass: - cpuStats.fpRegfileReads++; + executeStats[tid]->numFpRegReads++; break; case CCRegClass: - cpuStats.ccRegfileReads++; + executeStats[tid]->numCCRegReads++; break; case VecRegClass: case VecElemClass: - cpuStats.vecRegfileReads++; + executeStats[tid]->numVecRegReads++; break; case VecPredRegClass: - cpuStats.vecPredRegfileReads++; + executeStats[tid]->numVecPredRegReads++; break; default: break; @@ -1089,14 +1029,14 @@ CPU::getReg(PhysRegIdPtr phys_reg, void *val) } void * -CPU::getWritableReg(PhysRegIdPtr phys_reg) +CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) { switch (phys_reg->classValue()) { case VecRegClass: - cpuStats.vecRegfileReads++; + executeStats[tid]->numVecRegReads++; break; case VecPredRegClass: - cpuStats.vecPredRegfileReads++; + executeStats[tid]->numVecPredRegReads++; break; default: break; @@ -1105,24 +1045,24 @@ CPU::getWritableReg(PhysRegIdPtr phys_reg) } void -CPU::setReg(PhysRegIdPtr phys_reg, RegVal val) +CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) { switch (phys_reg->classValue()) { case IntRegClass: - cpuStats.intRegfileWrites++; + executeStats[tid]->numIntRegWrites++; break; case FloatRegClass: - cpuStats.fpRegfileWrites++; + executeStats[tid]->numFpRegWrites++; break; case CCRegClass: - cpuStats.ccRegfileWrites++; + executeStats[tid]->numCCRegWrites++; break; case VecRegClass: case VecElemClass: - cpuStats.vecRegfileWrites++; + executeStats[tid]->numVecRegWrites++; break; case VecPredRegClass: - cpuStats.vecPredRegfileWrites++; + executeStats[tid]->numVecPredRegWrites++; break; default: break; @@ -1131,24 +1071,24 @@ CPU::setReg(PhysRegIdPtr phys_reg, RegVal val) } void -CPU::setReg(PhysRegIdPtr phys_reg, const void *val) +CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid) { switch (phys_reg->classValue()) { case IntRegClass: - cpuStats.intRegfileWrites++; + executeStats[tid]->numIntRegWrites++; break; case FloatRegClass: - cpuStats.fpRegfileWrites++; + executeStats[tid]->numFpRegWrites++; break; case CCRegClass: - cpuStats.ccRegfileWrites++; + executeStats[tid]->numCCRegWrites++; break; case VecRegClass: case VecElemClass: - cpuStats.vecRegfileWrites++; + executeStats[tid]->numVecRegWrites++; break; case VecPredRegClass: - cpuStats.vecPredRegfileWrites++; + executeStats[tid]->numVecPredRegWrites++; break; default: break; diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 08a1312e73..07775298af 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -310,12 +310,12 @@ class CPU : public BaseCPU */ void setMiscReg(int misc_reg, RegVal val, ThreadID tid); - RegVal getReg(PhysRegIdPtr phys_reg); - void getReg(PhysRegIdPtr phys_reg, void *val); - void *getWritableReg(PhysRegIdPtr phys_reg); + RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid); + void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid); + void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid); - void setReg(PhysRegIdPtr phys_reg, RegVal val); - void setReg(PhysRegIdPtr phys_reg, const void *val); + void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid); + void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid); /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the @@ -595,24 +595,6 @@ class CPU : public BaseCPU /** Stat for the total IPC. */ statistics::Formula totalIpc; - //number of integer register file accesses - statistics::Scalar intRegfileReads; - statistics::Scalar intRegfileWrites; - //number of float register file accesses - statistics::Scalar fpRegfileReads; - statistics::Scalar fpRegfileWrites; - //number of vector register file accesses - mutable statistics::Scalar vecRegfileReads; - statistics::Scalar vecRegfileWrites; - //number of predicate register file accesses - mutable statistics::Scalar vecPredRegfileReads; - statistics::Scalar vecPredRegfileWrites; - //number of CC register file accesses - statistics::Scalar ccRegfileReads; - statistics::Scalar ccRegfileWrites; - //number of misc - statistics::Scalar miscRegfileReads; - statistics::Scalar miscRegfileWrites; } cpuStats; public: diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 54c0385374..c759c5eb38 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1086,10 +1086,10 @@ class DynInst : public ExecContext, public RefCounted if (bytes == sizeof(RegVal)) { setRegOperand(staticInst.get(), idx, - cpu->getReg(prev_phys_reg)); + cpu->getReg(prev_phys_reg, threadNumber)); } else { uint8_t val[original_dest_reg.regClass().regBytes()]; - cpu->getReg(prev_phys_reg, val); + cpu->getReg(prev_phys_reg, val, threadNumber); setRegOperand(staticInst.get(), idx, val); } } @@ -1116,7 +1116,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return 0; - return cpu->getReg(reg); + return cpu->getReg(reg, threadNumber); } void @@ -1125,13 +1125,13 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->getReg(reg, val); + cpu->getReg(reg, val, threadNumber); } void * getWritableRegOperand(const StaticInst *si, int idx) override { - return cpu->getWritableReg(renamedDestIdx(idx)); + return cpu->getWritableReg(renamedDestIdx(idx), threadNumber); } /** @todo: Make results into arrays so they can handle multiple dest @@ -1143,7 +1143,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val); + cpu->setReg(reg, val, threadNumber); setResult(reg->regClass(), val); } @@ -1153,7 +1153,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val); + cpu->setReg(reg, val, threadNumber); setResult(reg->regClass(), val); } }; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index b2a11fd84b..c8d9aeeb86 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -388,7 +388,7 @@ BaseSimpleCPU::postExecute() Addr instAddr = threadContexts[curThread]->pcState().instAddr(); if (curStaticInst->isMemRef()) { - t_info.execContextStats.numMemRefs++; + executeStats[t_info.thread->threadId()]->numMemRefs++; } if (curStaticInst->isLoad()) { @@ -402,19 +402,19 @@ BaseSimpleCPU::postExecute() /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ - t_info.execContextStats.numIntAluAccesses++; + executeStats[t_info.thread->threadId()]->numIntAluAccesses++; t_info.execContextStats.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ - t_info.execContextStats.numFpAluAccesses++; + executeStats[t_info.thread->threadId()]->numFpAluAccesses++; t_info.execContextStats.numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ - t_info.execContextStats.numVecAluAccesses++; + executeStats[t_info.thread->threadId()]->numVecAluAccesses++; t_info.execContextStats.numVecInsts++; } diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index d4bb017481..00efd8593c 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -90,12 +90,6 @@ class SimpleExecContext : public ExecContext "Number of instructions committed"), ADD_STAT(numOps, statistics::units::Count::get(), "Number of ops (including micro ops) committed"), - ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), - "Number of integer alu accesses"), - ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), - "Number of float alu accesses"), - ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), - "Number of vector alu accesses"), ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), @@ -110,32 +104,6 @@ class SimpleExecContext : public ExecContext "Number of vector instructions"), ADD_STAT(numMatInsts, statistics::units::Count::get(), "Number of matrix instructions"), - ADD_STAT(numIntRegReads, statistics::units::Count::get(), - "Number of times the integer registers were read"), - ADD_STAT(numIntRegWrites, statistics::units::Count::get(), - "Number of times the integer registers were written"), - ADD_STAT(numFpRegReads, statistics::units::Count::get(), - "Number of times the floating registers were read"), - ADD_STAT(numFpRegWrites, statistics::units::Count::get(), - "Number of times the floating registers were written"), - ADD_STAT(numVecRegReads, statistics::units::Count::get(), - "Number of times the vector registers were read"), - ADD_STAT(numVecRegWrites, statistics::units::Count::get(), - "Number of times the vector registers were written"), - ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), - "Number of times the predicate registers were read"), - ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), - "Number of times the predicate registers were written"), - ADD_STAT(numCCRegReads, statistics::units::Count::get(), - "Number of times the CC registers were read"), - ADD_STAT(numCCRegWrites, statistics::units::Count::get(), - "Number of times the CC registers were written"), - ADD_STAT(numMiscRegReads, statistics::units::Count::get(), - "Number of times the Misc registers were read"), - ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), - "Number of times the Misc registers were written"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory refs"), ADD_STAT(numLoadInsts, statistics::units::Count::get(), "Number of load instructions"), ADD_STAT(numStoreInsts, statistics::units::Count::get(), @@ -148,10 +116,6 @@ class SimpleExecContext : public ExecContext "Percentage of non-idle cycles"), ADD_STAT(idleFraction, statistics::units::Ratio::get(), "Percentage of idle cycles"), - ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), - "ICache total stall cycles"), - ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), - "DCache total stall cycles"), ADD_STAT(numPredictedBranches, statistics::units::Count::get(), "Number of branches predicted as taken"), ADD_STAT(numBranchMispred, statistics::units::Count::get(), @@ -159,36 +123,25 @@ class SimpleExecContext : public ExecContext ADD_STAT(statExecutedInstType, statistics::units::Count::get(), "Class of executed instruction."), numRegReads{ - &numIntRegReads, - &numFpRegReads, - &numVecRegReads, - &numVecRegReads, - &numVecPredRegReads, - &numMatRegReads, - &numCCRegReads + &(cpu->executeStats[thread->threadId()]->numIntRegReads), + &(cpu->executeStats[thread->threadId()]->numFpRegReads), + &(cpu->executeStats[thread->threadId()]->numVecRegReads), + &(cpu->executeStats[thread->threadId()]->numVecRegReads), + &(cpu->executeStats[thread->threadId()]->numVecPredRegReads), + &(cpu->executeStats[thread->threadId()]->numCCRegReads), + &numMatRegReads }, numRegWrites{ - &numIntRegWrites, - &numFpRegWrites, - &numVecRegWrites, - &numVecRegWrites, - &numVecPredRegWrites, - &numMatRegWrites, - &numCCRegWrites + &(cpu->executeStats[thread->threadId()]->numIntRegWrites), + &(cpu->executeStats[thread->threadId()]->numFpRegWrites), + &(cpu->executeStats[thread->threadId()]->numVecRegWrites), + &(cpu->executeStats[thread->threadId()]->numVecRegWrites), + &(cpu->executeStats[thread->threadId()] + ->numVecPredRegWrites), + &(cpu->executeStats[thread->threadId()]->numCCRegWrites), + &numMatRegWrites } { - numCCRegReads - .flags(statistics::nozero); - - numCCRegWrites - .flags(statistics::nozero); - - icacheStallCycles - .prereq(icacheStallCycles); - - dcacheStallCycles - .prereq(dcacheStallCycles); - statExecutedInstType .init(enums::Num_OpClass) .flags(statistics::total | statistics::pdf | statistics::dist); @@ -212,15 +165,6 @@ class SimpleExecContext : public ExecContext statistics::Scalar numInsts; statistics::Scalar numOps; - // Number of integer alu accesses - statistics::Scalar numIntAluAccesses; - - // Number of float alu accesses - statistics::Scalar numFpAluAccesses; - - // Number of vector alu accesses - statistics::Scalar numVecAluAccesses; - // Number of matrix alu accesses statistics::Scalar numMatAluAccesses; @@ -242,36 +186,11 @@ class SimpleExecContext : public ExecContext // Number of matrix instructions statistics::Scalar numMatInsts; - // Number of integer register file accesses - statistics::Scalar numIntRegReads; - statistics::Scalar numIntRegWrites; - - // Number of float register file accesses - statistics::Scalar numFpRegReads; - statistics::Scalar numFpRegWrites; - - // Number of vector register file accesses - mutable statistics::Scalar numVecRegReads; - statistics::Scalar numVecRegWrites; - - // Number of predicate register file accesses - mutable statistics::Scalar numVecPredRegReads; - statistics::Scalar numVecPredRegWrites; - // Number of matrix register file accesses mutable statistics::Scalar numMatRegReads; statistics::Scalar numMatRegWrites; - // Number of condition code register file accesses - statistics::Scalar numCCRegReads; - statistics::Scalar numCCRegWrites; - - // Number of misc register file accesses - statistics::Scalar numMiscRegReads; - statistics::Scalar numMiscRegWrites; - // Number of simulated memory references - statistics::Scalar numMemRefs; statistics::Scalar numLoadInsts; statistics::Scalar numStoreInsts; @@ -285,12 +204,6 @@ class SimpleExecContext : public ExecContext statistics::Average notIdleFraction; statistics::Formula idleFraction; - // Number of cycles stalled for I-cache responses - statistics::Scalar icacheStallCycles; - - // Number of cycles stalled for D-cache responses - statistics::Scalar dcacheStallCycles; - /// @{ /// Number of branches predicted as taken statistics::Scalar numPredictedBranches; @@ -361,7 +274,7 @@ class SimpleExecContext : public ExecContext RegVal readMiscRegOperand(const StaticInst *si, int idx) override { - execContextStats.numMiscRegReads++; + cpu->executeStats[thread->threadId()]->numMiscRegReads++; const RegId& reg = si->srcRegIdx(idx); assert(reg.is(MiscRegClass)); return thread->readMiscReg(reg.index()); @@ -370,7 +283,7 @@ class SimpleExecContext : public ExecContext void setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override { - execContextStats.numMiscRegWrites++; + cpu->executeStats[thread->threadId()]->numMiscRegWrites++; const RegId& reg = si->destRegIdx(idx); assert(reg.is(MiscRegClass)); thread->setMiscReg(reg.index(), val); @@ -383,7 +296,7 @@ class SimpleExecContext : public ExecContext RegVal readMiscReg(int misc_reg) override { - execContextStats.numMiscRegReads++; + cpu->executeStats[thread->threadId()]->numMiscRegReads++; return thread->readMiscReg(misc_reg); } @@ -394,7 +307,7 @@ class SimpleExecContext : public ExecContext void setMiscReg(int misc_reg, RegVal val) override { - execContextStats.numMiscRegWrites++; + cpu->executeStats[thread->threadId()]->numMiscRegWrites++; thread->setMiscReg(misc_reg, val); } From e85cf4f717ddd764a7c84000427ae56bac084855 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 00:52:58 -0800 Subject: [PATCH 239/492] cpu: Move commit stats from simple to base cpu Created stat group CommitCPUStats in BaseCPU and moved stats from the simple cpu model. The stats moved from SImpleCPU are numCondCtrlInsts, numFpInsts, numIntInsts, numLoadInsts, numStoreInsts, numVecInsts. Moved committedControl of MinorCPU to BaseCPU::CommittedCPUStats. In MinorCPU, this stat was a 2D vector, where the first dimension is the thread ID. In base it is now a 1D vector that is tied to a thread ID via the commitStats vector. The committedControl stat vector in CommitCPUStats is updated in the same way in all CPU models. The function updateComCtrlStats will update committedControl and the CPU models will call this function instead of updating committedControl directly. This function takes a StaticInstPtr as input, which Simple, Minor, and O3 CPU models are able to provide. Removed stat "branches" from O3 commit stage. This stat duplicates BaseCPU::CommittedCPUStats::committedControl::IsControl. O3 commit stats floating, integer, loads, memRefs, vectorInstructions are replaced by numFpInsts, numIntInsts, numLoadInsts, numMemRefs, numVecInsts from BaseCPU::CommitCPUStats respectively. Implemented numStoreInsts from BaseCPU::commitCPUStats for O3 commit stage. Change-Id: I362cec51513a404de56a02b450d7663327be20f5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67391 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce --- src/cpu/base.cc | 70 ++++++++++++++++++++++++++++++++++ src/cpu/base.hh | 32 ++++++++++++++++ src/cpu/minor/execute.cc | 37 +----------------- src/cpu/minor/stats.cc | 15 +------- src/cpu/minor/stats.hh | 6 --- src/cpu/o3/commit.cc | 52 ++++--------------------- src/cpu/o3/commit.hh | 12 ------ src/cpu/simple/base.cc | 19 ++++----- src/cpu/simple/exec_context.hh | 40 ------------------- 9 files changed, 121 insertions(+), 162 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index b10c731e17..8121307d50 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -194,9 +194,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) // create a stat group object for each thread on this core fetchStats.reserve(numThreads); executeStats.reserve(numThreads); + commitStats.reserve(numThreads); for (int i = 0; i < numThreads; i++) { fetchStats.emplace_back(new FetchCPUStats(this, i)); executeStats.emplace_back(new ExecuteCPUStats(this, i)); + commitStats.emplace_back(new CommitCPUStats(this, i)); } } @@ -922,4 +924,72 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) .prereq(numVecRegWrites); } +BaseCPU:: +CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) + : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory references committed"), + ADD_STAT(numFpInsts, statistics::units::Count::get(), + "Number of float instructions"), + ADD_STAT(numIntInsts, statistics::units::Count::get(), + "Number of integer instructions"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of store instructions"), + ADD_STAT(numVecInsts, statistics::units::Count::get(), + "Number of vector instructions"), + ADD_STAT(committedInstType, statistics::units::Count::get(), + "Class of committed instruction."), + ADD_STAT(committedControl, statistics::units::Count::get(), + "Class of control type instructions committed") +{ + committedInstType + .init(enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + + for (unsigned i = 0; i < Num_OpClasses; ++i) { + committedInstType.subname(i, enums::OpClassStrings[i]); + } + + committedControl + .init(StaticInstFlags::Flags::Num_Flags) + .flags(statistics::nozero); + + for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) { + committedControl.subname(i, StaticInstFlags::FlagsStrings[i]); + } +} + + +void +BaseCPU:: +CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst) +{ + /* Add a count for every control instruction type */ + if (staticInst->isControl()) { + if (staticInst->isReturn()) { + committedControl[gem5::StaticInstFlags::Flags::IsReturn]++; + } + if (staticInst->isCall()) { + committedControl[gem5::StaticInstFlags::Flags::IsCall]++; + } + if (staticInst->isDirectCtrl()) { + committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++; + } + if (staticInst->isIndirectCtrl()) { + committedControl + [gem5::StaticInstFlags::Flags::IsIndirectControl]++; + } + if (staticInst->isCondCtrl()) { + committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++; + } + if (staticInst->isUncondCtrl()) { + committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++; + } + committedControl[gem5::StaticInstFlags::Flags::IsControl]++; + } + +} + } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index ad6fa469a3..5b2e97f8b0 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -738,8 +738,40 @@ class BaseCPU : public ClockedObject statistics::Scalar numDiscardedOps; }; + struct CommitCPUStats: public statistics::Group + { + CommitCPUStats(statistics::Group *parent, int thread_id); + + /* Number of committed memory references. */ + statistics::Scalar numMemRefs; + + /* Number of float instructions */ + statistics::Scalar numFpInsts; + + /* Number of int instructions */ + statistics::Scalar numIntInsts; + + /* number of load instructions */ + statistics::Scalar numLoadInsts; + + /* Number of store instructions */ + statistics::Scalar numStoreInsts; + + /* Number of vector instructions */ + statistics::Scalar numVecInsts; + + /* Number of instructions committed by type (OpClass) */ + statistics::Vector committedInstType; + + /* number of control instructions committed by control inst type */ + statistics::Vector committedControl; + void updateComCtrlStats(const StaticInstPtr staticInst); + + }; + std::vector> fetchStats; std::vector> executeStats; + std::vector> commitStats; }; } // namespace gem5 diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index d657de5225..5c0354bb8a 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -879,41 +879,8 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) thread->numOp++; thread->threadStats.numOps++; cpu.stats.numOps++; - cpu.stats.committedInstType[inst->id.threadId] - [inst->staticInst->opClass()]++; - - /** Add a count for every control instruction */ - if (inst->staticInst->isControl()) { - if (inst->staticInst->isReturn()) { - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsReturn]++; - } - if (inst->staticInst->isCall()) { - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsCall]++; - } - if (inst->staticInst->isDirectCtrl()) { - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsDirectControl]++; - } - if (inst->staticInst->isIndirectCtrl()) { - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsIndirectControl]++; - } - if (inst->staticInst->isCondCtrl()) { - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsCondControl]++; - } - if (inst->staticInst->isUncondCtrl()) { - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsUncondControl]++; - - } - cpu.stats.committedControl[inst->id.threadId] - [gem5::StaticInstFlags::Flags::IsControl]++; - } - - + cpu.commitStats[inst->id.threadId] + ->committedInstType[inst->staticInst->opClass()]++; /* Set the CP SeqNum to the numOps commit number */ if (inst->traceData) diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index 10e7573afd..b20ce95ec8 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -57,11 +57,7 @@ MinorStats::MinorStats(BaseCPU *base_cpu) "CPI: cycles per instruction"), ADD_STAT(ipc, statistics::units::Rate< statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle"), - ADD_STAT(committedInstType, statistics::units::Count::get(), - "Class of committed instruction"), - ADD_STAT(committedControl, statistics::units::Count::get(), - "Class of control type instructions committed") + "IPC: instructions per cycle") { quiesceCycles.prereq(quiesceCycles); @@ -72,15 +68,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu) ipc.precision(6); ipc = numInsts / base_cpu->baseStats.numCycles; - committedInstType - .init(base_cpu->numThreads, enums::Num_OpClass) - .flags(statistics::total | statistics::pdf | statistics::dist); - committedInstType.ysubnames(enums::OpClassStrings); - - committedControl - .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags) - .flags(statistics::nozero); - committedControl.ysubnames(StaticInstFlags::FlagsStrings); } } // namespace minor diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index e5d018679d..f7d5e71dfa 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -72,12 +72,6 @@ struct MinorStats : public statistics::Group statistics::Formula cpi; statistics::Formula ipc; - /** Number of instructions by type (OpClass) */ - statistics::Vector2d committedInstType; - - /** Number of branches commited */ - statistics::Vector2d committedControl; - }; } // namespace minor diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 38dce831b1..7419b2a2f9 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -160,21 +160,10 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) "Number of instructions committed"), ADD_STAT(opsCommitted, statistics::units::Count::get(), "Number of ops (including micro ops) committed"), - ADD_STAT(memRefs, statistics::units::Count::get(), - "Number of memory references committed"), - ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"), ADD_STAT(amos, statistics::units::Count::get(), "Number of atomic instructions committed"), ADD_STAT(membars, statistics::units::Count::get(), "Number of memory barriers committed"), - ADD_STAT(branches, statistics::units::Count::get(), - "Number of branches committed"), - ADD_STAT(vectorInstructions, statistics::units::Count::get(), - "Number of committed Vector instructions."), - ADD_STAT(floating, statistics::units::Count::get(), - "Number of committed floating point instructions."), - ADD_STAT(integer, statistics::units::Count::get(), - "Number of committed integer instructions."), ADD_STAT(functionCalls, statistics::units::Count::get(), "Number of function calls committed."), ADD_STAT(committedInstType, statistics::units::Count::get(), @@ -200,14 +189,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(cpu->numThreads) .flags(total); - memRefs - .init(cpu->numThreads) - .flags(total); - - loads - .init(cpu->numThreads) - .flags(total); - amos .init(cpu->numThreads) .flags(total); @@ -216,22 +197,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(cpu->numThreads) .flags(total); - branches - .init(cpu->numThreads) - .flags(total); - - vectorInstructions - .init(cpu->numThreads) - .flags(total); - - floating - .init(cpu->numThreads) - .flags(total); - - integer - .init(cpu->numThreads) - .flags(total); - functionCalls .init(commit->numThreads) .flags(total); @@ -1396,21 +1361,20 @@ Commit::updateComInstStats(const DynInstPtr &inst) // // Control Instructions // - if (inst->isControl()) - stats.branches[tid]++; + cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst); // // Memory references // if (inst->isMemRef()) { - stats.memRefs[tid]++; + cpu->commitStats[tid]->numMemRefs++; if (inst->isLoad()) { - stats.loads[tid]++; + cpu->commitStats[tid]->numLoadInsts++; } - if (inst->isAtomic()) { - stats.amos[tid]++; + if (inst->isStore()) { + cpu->commitStats[tid]->numStoreInsts++; } } @@ -1420,14 +1384,14 @@ Commit::updateComInstStats(const DynInstPtr &inst) // Integer Instruction if (inst->isInteger()) - stats.integer[tid]++; + cpu->commitStats[tid]->numIntInsts++; // Floating Point Instruction if (inst->isFloating()) - stats.floating[tid]++; + cpu->commitStats[tid]->numFpInsts++; // Vector Instruction if (inst->isVector()) - stats.vectorInstructions[tid]++; + cpu->commitStats[tid]->numVecInsts++; // Function Calls if (inst->isCall()) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index cf4eaf5d92..6591360197 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -483,22 +483,10 @@ class Commit statistics::Vector instsCommitted; /** Total number of ops (including micro ops) committed. */ statistics::Vector opsCommitted; - /** Stat for the total number of committed memory references. */ - statistics::Vector memRefs; - /** Stat for the total number of committed loads. */ - statistics::Vector loads; /** Stat for the total number of committed atomics. */ statistics::Vector amos; /** Total number of committed memory barriers. */ statistics::Vector membars; - /** Total number of committed branches. */ - statistics::Vector branches; - /** Total number of vector instructions */ - statistics::Vector vectorInstructions; - /** Total number of floating point instructions */ - statistics::Vector floating; - /** Total number of integer instructions */ - statistics::Vector integer; /** Total number of function calls */ statistics::Vector functionCalls; /** Committed instructions by instruction type (OpClass) */ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index c8d9aeeb86..70da65953b 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -403,19 +403,19 @@ BaseSimpleCPU::postExecute() //integer alu accesses if (curStaticInst->isInteger()){ executeStats[t_info.thread->threadId()]->numIntAluAccesses++; - t_info.execContextStats.numIntInsts++; + commitStats[t_info.thread->threadId()]->numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ executeStats[t_info.thread->threadId()]->numFpAluAccesses++; - t_info.execContextStats.numFpInsts++; + commitStats[t_info.thread->threadId()]->numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ executeStats[t_info.thread->threadId()]->numVecAluAccesses++; - t_info.execContextStats.numVecInsts++; + commitStats[t_info.thread->threadId()]->numVecInsts++; } //Matrix alu accesses @@ -429,22 +429,19 @@ BaseSimpleCPU::postExecute() t_info.execContextStats.numCallsReturns++; } - //the number of branch predictions that will be made - if (curStaticInst->isCondCtrl()){ - t_info.execContextStats.numCondCtrlInsts++; - } - //result bus acceses if (curStaticInst->isLoad()){ - t_info.execContextStats.numLoadInsts++; + commitStats[t_info.thread->threadId()]->numLoadInsts++; } if (curStaticInst->isStore() || curStaticInst->isAtomic()){ - t_info.execContextStats.numStoreInsts++; + commitStats[t_info.thread->threadId()]->numStoreInsts++; } /* End power model statistics */ - t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++; + commitStats[t_info.thread->threadId()] + ->committedInstType[curStaticInst->opClass()]++; + commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst); if (FullSystem) traceFunctions(instAddr); diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 00efd8593c..42d6181cf2 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -94,20 +94,8 @@ class SimpleExecContext : public ExecContext "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), "Number of times a function call or return occured"), - ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(), - "Number of instructions that are conditional controls"), - ADD_STAT(numIntInsts, statistics::units::Count::get(), - "Number of integer instructions"), - ADD_STAT(numFpInsts, statistics::units::Count::get(), - "Number of float instructions"), - ADD_STAT(numVecInsts, statistics::units::Count::get(), - "Number of vector instructions"), ADD_STAT(numMatInsts, statistics::units::Count::get(), "Number of matrix instructions"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of store instructions"), ADD_STAT(numIdleCycles, statistics::units::Cycle::get(), "Number of idle cycles"), ADD_STAT(numBusyCycles, statistics::units::Cycle::get(), @@ -120,8 +108,6 @@ class SimpleExecContext : public ExecContext "Number of branches predicted as taken"), ADD_STAT(numBranchMispred, statistics::units::Count::get(), "Number of branch mispredictions"), - ADD_STAT(statExecutedInstType, statistics::units::Count::get(), - "Class of executed instruction."), numRegReads{ &(cpu->executeStats[thread->threadId()]->numIntRegReads), &(cpu->executeStats[thread->threadId()]->numFpRegReads), @@ -142,13 +128,6 @@ class SimpleExecContext : public ExecContext &numMatRegWrites } { - statExecutedInstType - .init(enums::Num_OpClass) - .flags(statistics::total | statistics::pdf | statistics::dist); - - for (unsigned i = 0; i < Num_OpClasses; ++i) { - statExecutedInstType.subname(i, enums::OpClassStrings[i]); - } idleFraction = statistics::constant(1.0) - notIdleFraction; numIdleCycles = idleFraction * cpu->baseStats.numCycles; @@ -171,18 +150,6 @@ class SimpleExecContext : public ExecContext // Number of function calls/returns statistics::Scalar numCallsReturns; - // Conditional control instructions; - statistics::Scalar numCondCtrlInsts; - - // Number of int instructions - statistics::Scalar numIntInsts; - - // Number of float instructions - statistics::Scalar numFpInsts; - - // Number of vector instructions - statistics::Scalar numVecInsts; - // Number of matrix instructions statistics::Scalar numMatInsts; @@ -190,10 +157,6 @@ class SimpleExecContext : public ExecContext mutable statistics::Scalar numMatRegReads; statistics::Scalar numMatRegWrites; - // Number of simulated memory references - statistics::Scalar numLoadInsts; - statistics::Scalar numStoreInsts; - // Number of idle cycles statistics::Formula numIdleCycles; @@ -211,9 +174,6 @@ class SimpleExecContext : public ExecContext statistics::Scalar numBranchMispred; /// @} - // Instruction mix histogram by OpClass - statistics::Vector statExecutedInstType; - std::array numRegReads; std::array numRegWrites; From c7b6e7809933d0d4d63506ef58f87d7265e0fb51 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 01:40:36 -0800 Subject: [PATCH 240/492] cpu: Move numInsts, numOps, ipc, cpi to BaseCPU In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core committed instructions and operations. In BaseCPU::FetchCPUStats, numInsts and numOps track per thread fetched instructions and operations. In BaseCPU::CommitCPUStats, numInsts and numOps track per thread committed instructions and operations. In BaseSimpleCPU, the countInst() function has been split into countInst(), countFetchInst(), and countCommitInst(). The stat count incrementation of countInst() has been removed and delegated to the other two functions. countFetchInst() increments numInsts and numOps of the FetchCPUStats group for a thread. countCommitInst() increments the numInsts and numOps of the CommitCPUStats group for a thread and of the BaseCPUStats group for a CPU core. These functions are called in the appropriate stage within timing.cc and atomic.cc. The call to countInst() is left unchanged. countFetchInst() is called in preExecute(). countCommitInst() is called in postExecute(). For MinorCPU, only the commit level numInsts and numOps stats have been implemented. IPC and CPI stats have been added to BaseCPUStats (core level) and CommitCPUStats (thread level). The formulas for the IPC and CPI stats in CommitCPUStats are set in the BaseCPU constructor, after the CommitCPUStats stat group object has been created. Change-Id: If893b331fe4a6908e4b4caf4a30f1b0aeb4c4266 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67392 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/cpu/base.cc | 38 +++++++++++++++++++++++++++++++++- src/cpu/base.hh | 20 ++++++++++++++++++ src/cpu/minor/execute.cc | 6 ++++-- src/cpu/minor/stats.cc | 18 +--------------- src/cpu/minor/stats.hh | 10 --------- src/cpu/simple/base.cc | 38 ++++++++++++++++++++++++++++++++-- src/cpu/simple/base.hh | 2 ++ src/cpu/simple/exec_context.hh | 8 ------- 8 files changed, 100 insertions(+), 40 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 8121307d50..67f8e7bfc0 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -198,7 +198,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) for (int i = 0; i < numThreads; i++) { fetchStats.emplace_back(new FetchCPUStats(this, i)); executeStats.emplace_back(new ExecuteCPUStats(this, i)); - commitStats.emplace_back(new CommitCPUStats(this, i)); + // create commitStat object for thread i and set ipc, cpi formulas + CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); + commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles; + commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts; + commitStats.emplace_back(commitStatptr); } } @@ -392,13 +396,28 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc) BaseCPU:: BaseCPUStats::BaseCPUStats(statistics::Group *parent) : statistics::Group(parent), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed (core level)"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed (core level)"), ADD_STAT(numCycles, statistics::units::Cycle::get(), "Number of cpu cycles simulated"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction (core level)"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle (core level)"), ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(), "Number of work items this cpu started"), ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(), "Number of work items this cpu completed") { + cpi.precision(6); + cpi = numCycles / numInsts; + + ipc.precision(6); + ipc = numInsts / numCycles; } void @@ -839,6 +858,10 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent) BaseCPU:: FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions fetched (thread level)"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) fetched (thread level)"), ADD_STAT(numBranches, statistics::units::Count::get(), "Number of branches fetched"), ADD_STAT(numFetchSuspends, statistics::units::Count::get(), @@ -927,6 +950,16 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) BaseCPU:: CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed (thread level)"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed (thread level)"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction (thread level)"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle (thread level)"), ADD_STAT(numMemRefs, statistics::units::Count::get(), "Number of memory references committed"), ADD_STAT(numFpInsts, statistics::units::Count::get(), @@ -944,6 +977,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) ADD_STAT(committedControl, statistics::units::Count::get(), "Class of control type instructions committed") { + cpi.precision(6); + ipc.precision(6); + committedInstType .init(enums::Num_OpClass) .flags(statistics::total | statistics::pdf | statistics::dist); diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 5b2e97f8b0..06fc2a391d 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -633,8 +633,14 @@ class BaseCPU : public ClockedObject struct BaseCPUStats : public statistics::Group { BaseCPUStats(statistics::Group *parent); + // Number of CPU insts and ops committed at CPU core level + statistics::Scalar numInsts; + statistics::Scalar numOps; // Number of CPU cycles simulated statistics::Scalar numCycles; + /* CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; statistics::Scalar numWorkItemsStarted; statistics::Scalar numWorkItemsCompleted; } baseStats; @@ -683,6 +689,12 @@ class BaseCPU : public ClockedObject { FetchCPUStats(statistics::Group *parent, int thread_id); + /* Total number of instructions fetched */ + statistics::Scalar numInsts; + + /* Total number of operations fetched */ + statistics::Scalar numOps; + /* Total number of branches fetched */ statistics::Scalar numBranches; @@ -742,6 +754,14 @@ class BaseCPU : public ClockedObject { CommitCPUStats(statistics::Group *parent, int thread_id); + /* Number of simulated instructions committed */ + statistics::Scalar numInsts; + statistics::Scalar numOps; + + /* CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; + /* Number of committed memory references. */ statistics::Scalar numMemRefs; diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 5c0354bb8a..2908c2266f 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -871,14 +871,16 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) { thread->numInst++; thread->threadStats.numInsts++; - cpu.stats.numInsts++; + cpu.commitStats[inst->id.threadId]->numInsts++; + cpu.baseStats.numInsts++; /* Act on events related to instruction counts */ thread->comInstEventQueue.serviceEvents(thread->numInst); } thread->numOp++; thread->threadStats.numOps++; - cpu.stats.numOps++; + cpu.commitStats[inst->id.threadId]->numOps++; + cpu.baseStats.numOps++; cpu.commitStats[inst->id.threadId] ->committedInstType[inst->staticInst->opClass()]++; diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index b20ce95ec8..e31cbe93a1 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -45,29 +45,13 @@ namespace minor MinorStats::MinorStats(BaseCPU *base_cpu) : statistics::Group(base_cpu), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle") + "for an interrupt") { quiesceCycles.prereq(quiesceCycles); - cpi.precision(6); - cpi = base_cpu->baseStats.numCycles / numInsts; - - ipc.precision(6); - ipc = numInsts / base_cpu->baseStats.numCycles; - } } // namespace minor diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index f7d5e71dfa..98ac80f15c 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -59,19 +59,9 @@ struct MinorStats : public statistics::Group { MinorStats(BaseCPU *parent); - /** Number of simulated instructions */ - statistics::Scalar numInsts; - - /** Number of simulated insts and microops */ - statistics::Scalar numOps; - /** Number of cycles in quiescent state */ statistics::Scalar quiesceCycles; - /** CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; - }; } // namespace minor diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 70da65953b..35d149097c 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -154,10 +154,36 @@ BaseSimpleCPU::countInst() if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { t_info.numInst++; - t_info.execContextStats.numInsts++; } t_info.numOp++; - t_info.execContextStats.numOps++; +} + +void +BaseSimpleCPU::countFetchInst() +{ + SimpleExecContext& t_info = *threadInfo[curThread]; + + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + // increment thread level numInsts fetched count + fetchStats[t_info.thread->threadId()]->numInsts++; + } + // increment thread level numOps fetched count + fetchStats[t_info.thread->threadId()]->numOps++; +} + +void +BaseSimpleCPU::countCommitInst() +{ + SimpleExecContext& t_info = *threadInfo[curThread]; + + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + // increment thread level and core level numInsts count + commitStats[t_info.thread->threadId()]->numInsts++; + baseStats.numInsts++; + } + // increment thread level and core level numOps count + commitStats[t_info.thread->threadId()]->numOps++; + baseStats.numOps++; } Counter @@ -376,6 +402,11 @@ BaseSimpleCPU::preExecute() if (predict_taken) ++t_info.execContextStats.numPredictedBranches; } + + // increment the fetch instruction stat counters + if (curStaticInst) { + countFetchInst(); + } } void @@ -443,6 +474,9 @@ BaseSimpleCPU::postExecute() ->committedInstType[curStaticInst->opClass()]++; commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst); + /* increment the committed numInsts and numOps stats */ + countCommitInst(); + if (FullSystem) traceFunctions(instAddr); diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index df5290cf3c..46a25a0a42 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -182,6 +182,8 @@ class BaseSimpleCPU : public BaseCPU } void countInst(); + void countFetchInst(); + void countCommitInst(); Counter totalInsts() const override; Counter totalOps() const override; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 42d6181cf2..c0927fcadd 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -86,10 +86,6 @@ class SimpleExecContext : public ExecContext : statistics::Group(cpu, csprintf("exec_context.thread_%i", thread->threadId()).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed"), ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), @@ -140,10 +136,6 @@ class SimpleExecContext : public ExecContext .prereq(numBranchMispred); } - // Number of simulated instructions - statistics::Scalar numInsts; - statistics::Scalar numOps; - // Number of matrix alu accesses statistics::Scalar numMatAluAccesses; From d943e42bdd0c4d7c0c3c70258306149ee341bb5a Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 01:48:32 -0800 Subject: [PATCH 241/492] cpu-o3: Use base instructions committed counters in O3CPU Moved committedInsts from O3 cpu.* to BaseCPU as numInstsNotNOP because it tracks the instructions committed that are not NOPs or prefetches. This change also does the same for commitedOps. InstsCommitted from O3 commit.*, which tracks all instructions committed, has been removed. CommitCPUStats::numInsts replaces it in O3. The same has been done for opsCommitted. Because IPC and CPI calculations are handled in BaseCPU, removed IPC and CPI stats from O3 cpu.*. Change-Id: I9f122c9a9dafccd5342f18056f282f3dad8b1b1e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67393 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/cpu/base.cc | 4 ++++ src/cpu/base.hh | 4 ++++ src/cpu/o3/commit.cc | 21 ++++++------------- src/cpu/o3/commit.hh | 4 ---- src/cpu/o3/cpu.cc | 49 +++----------------------------------------- src/cpu/o3/cpu.hh | 13 ------------ 6 files changed, 17 insertions(+), 78 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 67f8e7bfc0..fa30e4b5e6 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -954,6 +954,10 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) "Number of instructions committed (thread level)"), ADD_STAT(numOps, statistics::units::Count::get(), "Number of ops (including micro ops) committed (thread level)"), + ADD_STAT(numInstsNotNOP, statistics::units::Count::get(), + "Number of instructions committed excluding NOPs or prefetches"), + ADD_STAT(numOpsNotNOP, statistics::units::Count::get(), + "Number of Ops (including micro ops) Simulated"), ADD_STAT(cpi, statistics::units::Rate< statistics::units::Cycle, statistics::units::Count>::get(), "CPI: cycles per instruction (thread level)"), diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 06fc2a391d..a9af865da0 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -758,6 +758,10 @@ class BaseCPU : public ClockedObject statistics::Scalar numInsts; statistics::Scalar numOps; + /* Number of instructions committed that are not NOP or prefetches */ + statistics::Scalar numInstsNotNOP; + statistics::Scalar numOpsNotNOP; + /* CPI/IPC for total cycle counts and macro insts */ statistics::Formula cpi; statistics::Formula ipc; diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 7419b2a2f9..e1f01680ca 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -156,10 +156,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) "The number of times a branch was mispredicted"), ADD_STAT(numCommittedDist, statistics::units::Count::get(), "Number of insts commited each cycle"), - ADD_STAT(instsCommitted, statistics::units::Count::get(), - "Number of instructions committed"), - ADD_STAT(opsCommitted, statistics::units::Count::get(), - "Number of ops (including micro ops) committed"), ADD_STAT(amos, statistics::units::Count::get(), "Number of atomic instructions committed"), ADD_STAT(membars, statistics::units::Count::get(), @@ -181,14 +177,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(0,commit->commitWidth,1) .flags(statistics::pdf); - instsCommitted - .init(cpu->numThreads) - .flags(total); - - opsCommitted - .init(cpu->numThreads) - .flags(total); - amos .init(cpu->numThreads) .flags(total); @@ -1348,9 +1336,12 @@ Commit::updateComInstStats(const DynInstPtr &inst) { ThreadID tid = inst->threadNumber; - if (!inst->isMicroop() || inst->isLastMicroop()) - stats.instsCommitted[tid]++; - stats.opsCommitted[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) { + cpu->commitStats[tid]->numInsts++; + cpu->baseStats.numInsts++; + } + cpu->commitStats[tid]->numOps++; + cpu->baseStats.numOps++; // To match the old model, don't count nops and instruction // prefetches towards the total commit count. diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 6591360197..eccd023d45 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -479,10 +479,6 @@ class Commit /** Distribution of the number of committed instructions each cycle. */ statistics::Distribution numCommittedDist; - /** Total number of instructions committed. */ - statistics::Vector instsCommitted; - /** Total number of ops (including micro ops) committed. */ - statistics::Vector opsCommitted; /** Stat for the total number of committed atomics. */ statistics::Vector amos; /** Total number of committed memory barriers. */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 90df3b349e..93c58fef63 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -328,23 +328,7 @@ CPU::CPUStats::CPUStats(CPU *cpu) "to idling"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt"), - ADD_STAT(committedInsts, statistics::units::Count::get(), - "Number of Instructions Simulated"), - ADD_STAT(committedOps, statistics::units::Count::get(), - "Number of Ops (including micro ops) Simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: Cycles Per Instruction"), - ADD_STAT(totalCpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: Total CPI of All Threads"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: Instructions Per Cycle"), - ADD_STAT(totalIpc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: Total IPC of All Threads") + "for an interrupt") { // Register any of the O3CPU's stats here. timesIdled @@ -356,33 +340,6 @@ CPU::CPUStats::CPUStats(CPU *cpu) quiesceCycles .prereq(quiesceCycles); - // Number of Instructions simulated - // -------------------------------- - // Should probably be in Base CPU but need templated - // MaxThreads so put in here instead - committedInsts - .init(cpu->numThreads) - .flags(statistics::total); - - committedOps - .init(cpu->numThreads) - .flags(statistics::total); - - cpi - .precision(6); - cpi = cpu->baseStats.numCycles / committedInsts; - - totalCpi - .precision(6); - totalCpi = cpu->baseStats.numCycles / sum(committedInsts); - - ipc - .precision(6); - ipc = committedInsts / cpu->baseStats.numCycles; - - totalIpc - .precision(6); - totalIpc = sum(committedInsts) / cpu->baseStats.numCycles; } void @@ -1170,14 +1127,14 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst) if (!inst->isMicroop() || inst->isLastMicroop()) { thread[tid]->numInst++; thread[tid]->threadStats.numInsts++; - cpuStats.committedInsts[tid]++; + commitStats[tid]->numInstsNotNOP++; // Check for instruction-count-based events. thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst); } thread[tid]->numOp++; thread[tid]->threadStats.numOps++; - cpuStats.committedOps[tid]++; + commitStats[tid]->numOpsNotNOP++; probeInstCommit(inst->staticInst, inst->pcState().instAddr()); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 07775298af..7dc378428b 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -581,19 +581,6 @@ class CPU : public BaseCPU /** Stat for total number of cycles the CPU spends descheduled due to a * quiesce operation or waiting for an interrupt. */ statistics::Scalar quiesceCycles; - /** Stat for the number of committed instructions per thread. */ - statistics::Vector committedInsts; - /** Stat for the number of committed ops (including micro ops) per - * thread. */ - statistics::Vector committedOps; - /** Stat for the CPI per thread. */ - statistics::Formula cpi; - /** Stat for the total CPI. */ - statistics::Formula totalCpi; - /** Stat for the IPC per thread. */ - statistics::Formula ipc; - /** Stat for the total IPC. */ - statistics::Formula totalIpc; } cpuStats; From 1c4cc8dbd04b6ae875ca920fcd2ce0ef00cd6b38 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 02:04:39 -0800 Subject: [PATCH 242/492] cpu-o3: Move general fetch stats to BaseCPU::FetchCPUStats The stats moved are from fetch.hh and fetch.cc of O3. Stat branches is now tracked by numBranches. Stat branchRate is now tracked by branchRate in FetchCPUStats. Stat rate is tracked by fetchRate. Stat insts is tracked by numInsts. Stat icacheStallCycles is tracked by icacheStallCycles in FetchCPUStats. Change-Id: I48313614edd078631df4ef6b00982c335798fcb1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67394 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/cpu/base.cc | 28 +++++++++++++++++++++++++++- src/cpu/base.hh | 9 +++++++++ src/cpu/o3/fetch.cc | 33 +++++---------------------------- src/cpu/o3/fetch.hh | 10 ---------- 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index fa30e4b5e6..490e48938a 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -196,8 +196,15 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) executeStats.reserve(numThreads); commitStats.reserve(numThreads); for (int i = 0; i < numThreads; i++) { - fetchStats.emplace_back(new FetchCPUStats(this, i)); + // create fetchStat object for thread i and set rate formulas + FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i); + fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles; + fetchStatptr->branchRate = fetchStatptr->numBranches / + baseStats.numCycles; + fetchStats.emplace_back(fetchStatptr); + executeStats.emplace_back(new ExecuteCPUStats(this, i)); + // create commitStat object for thread i and set ipc, cpi formulas CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles; @@ -862,15 +869,31 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) "Number of instructions fetched (thread level)"), ADD_STAT(numOps, statistics::units::Count::get(), "Number of ops (including micro ops) fetched (thread level)"), + ADD_STAT(fetchRate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Number of inst fetches per cycle"), ADD_STAT(numBranches, statistics::units::Count::get(), "Number of branches fetched"), + ADD_STAT(branchRate, statistics::units::Ratio::get(), + "Number of branch fetches per cycle"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "ICache total stall cycles"), ADD_STAT(numFetchSuspends, statistics::units::Count::get(), "Number of times Execute suspended instruction fetching") { + fetchRate + .flags(statistics::total); + numBranches .prereq(numBranches); + branchRate + .flags(statistics::total); + + icacheStallCycles + .prereq(icacheStallCycles); + } // means it is incremented in a vector indexing and not directly @@ -981,6 +1004,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) ADD_STAT(committedControl, statistics::units::Count::get(), "Class of control type instructions committed") { + numInsts + .prereq(numInsts); + cpi.precision(6); ipc.precision(6); diff --git a/src/cpu/base.hh b/src/cpu/base.hh index a9af865da0..5d0d3cab01 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -695,9 +695,18 @@ class BaseCPU : public ClockedObject /* Total number of operations fetched */ statistics::Scalar numOps; + /* Number of instruction fetched per cycle. */ + statistics::Formula fetchRate; + /* Total number of branches fetched */ statistics::Scalar numBranches; + /* Number of branch fetches per cycle. */ + statistics::Formula branchRate; + + /* Number of cycles stalled due to an icache miss */ + statistics::Scalar icacheStallCycles; + /* Number of times fetch was asked to suspend by Execute */ statistics::Scalar numFetchSuspends; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index d3cdd2c761..f5fc6c62ec 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -158,12 +158,6 @@ Fetch::regProbePoints() Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) : statistics::Group(cpu, "fetch"), - ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), - "Number of cycles fetch is stalled on an Icache miss"), - ADD_STAT(insts, statistics::units::Count::get(), - "Number of instructions fetch has processed"), - ADD_STAT(branches, statistics::units::Count::get(), - "Number of branches that fetch encountered"), ADD_STAT(predictedBranches, statistics::units::Count::get(), "Number of branches that fetch has predicted taken"), ADD_STAT(cycles, statistics::units::Cycle::get(), @@ -200,21 +194,8 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) "Number of instructions fetched each cycle (Total)"), ADD_STAT(idleRate, statistics::units::Ratio::get(), "Ratio of cycles fetch was idle", - idleCycles / cpu->baseStats.numCycles), - ADD_STAT(branchRate, statistics::units::Ratio::get(), - "Number of branch fetches per cycle", - branches / cpu->baseStats.numCycles), - ADD_STAT(rate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Number of inst fetches per cycle", - insts / cpu->baseStats.numCycles) + idleCycles / cpu->baseStats.numCycles) { - icacheStallCycles - .prereq(icacheStallCycles); - insts - .prereq(insts); - branches - .prereq(branches); predictedBranches .prereq(predictedBranches); cycles @@ -252,10 +233,6 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) .flags(statistics::pdf); idleRate .prereq(idleRate); - branchRate - .flags(statistics::total); - rate - .flags(statistics::total); } void Fetch::setTimeBuffer(TimeBuffer *time_buffer) @@ -540,7 +517,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc) inst->setPredTarg(next_pc); inst->setPredTaken(predict_taken); - ++fetchStats.branches; + cpu->fetchStats[tid]->numBranches++; if (predict_taken) { ++fetchStats.predictedBranches; @@ -1146,7 +1123,7 @@ Fetch::fetch(bool &status_change) fetchCacheLine(fetchAddr, tid, this_pc.instAddr()); if (fetchStatus[tid] == IcacheWaitResponse) - ++fetchStats.icacheStallCycles; + cpu->fetchStats[tid]->icacheStallCycles++; else if (fetchStatus[tid] == ItlbWait) ++fetchStats.tlbCycles; else @@ -1242,7 +1219,7 @@ Fetch::fetch(bool &status_change) staticInst = dec_ptr->decode(this_pc); // Increment stat of fetched instructions. - ++fetchStats.insts; + cpu->fetchStats[tid]->numInsts++; if (staticInst->isMacroop()) { curMacroop = staticInst; @@ -1572,7 +1549,7 @@ Fetch::profileStall(ThreadID tid) ++fetchStats.squashCycles; DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); } else if (fetchStatus[tid] == IcacheWaitResponse) { - ++fetchStats.icacheStallCycles; + cpu->fetchStats[tid]->icacheStallCycles++; DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", tid); } else if (fetchStatus[tid] == ItlbWait) { diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index cd311913f5..6add31444d 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -533,12 +533,6 @@ class Fetch FetchStatGroup(CPU *cpu, Fetch *fetch); // @todo: Consider making these // vectors and tracking on a per thread basis. - /** Stat for total number of cycles stalled due to an icache miss. */ - statistics::Scalar icacheStallCycles; - /** Stat for total number of fetched instructions. */ - statistics::Scalar insts; - /** Total number of fetched branches. */ - statistics::Scalar branches; /** Stat for total number of predicted branches. */ statistics::Scalar predictedBranches; /** Stat for total number of cycles spent fetching. */ @@ -581,10 +575,6 @@ class Fetch statistics::Distribution nisnDist; /** Rate of how often fetch was idle. */ statistics::Formula idleRate; - /** Number of branch fetches per cycle. */ - statistics::Formula branchRate; - /** Number of instruction fetched per cycle. */ - statistics::Formula rate; } fetchStats; }; From 0974fe6f24ce748057b5b1a3002ebac75d11b397 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 02:11:11 -0800 Subject: [PATCH 243/492] cpu-o3: Move O3 IEW stats to BaseCPU::ExecuteCPUStats Moved numInsts, numBranches, numNop, numRefs, numLoadInsts, numRate to Base. Merged numRefs into numMemRefs of ExecuteCPUStats. Renamed numRate to instRate. Updated formatting in ExecuteCPUStats group. Change-Id: I1fd3a989d917eb2ffaa865b067b80e266d6f55bc Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67395 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/cpu/base.cc | 53 ++++++++++++++++++++++++++++++++--------------- src/cpu/base.hh | 13 ++++++++++++ src/cpu/o3/iew.cc | 50 ++++++-------------------------------------- src/cpu/o3/iew.hh | 14 ------------- 4 files changed, 55 insertions(+), 75 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 490e48938a..cee76472f5 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -203,7 +203,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) baseStats.numCycles; fetchStats.emplace_back(fetchStatptr); - executeStats.emplace_back(new ExecuteCPUStats(this, i)); + // create executeStat object for thread i and set rate formulas + ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i); + executeStatptr->instRate = executeStatptr->numInsts / + baseStats.numCycles; + executeStats.emplace_back(executeStatptr); // create commitStat object for thread i and set ipc, cpi formulas CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); @@ -900,6 +904,19 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) BaseCPU:: ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of executed instructions"), + ADD_STAT(numNop, statistics::units::Count::get(), + "Number of nop insts executed"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches executed"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions executed"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of stores executed"), + ADD_STAT(instRate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Inst execution rate"), ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), "DCache total stall cycles"), ADD_STAT(numCCRegReads, statistics::units::Count::get(), @@ -938,36 +955,38 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) "Number of ops (including micro ops) which were discarded before " "commit") { + numStoreInsts = numMemRefs - numLoadInsts; + dcacheStallCycles - .prereq(dcacheStallCycles); + .prereq(dcacheStallCycles); numCCRegReads - .prereq(numCCRegReads) - .flags(statistics::nozero); + .prereq(numCCRegReads) + .flags(statistics::nozero); numCCRegWrites - .prereq(numCCRegWrites) - .flags(statistics::nozero); + .prereq(numCCRegWrites) + .flags(statistics::nozero); numFpAluAccesses - .prereq(numFpAluAccesses); + .prereq(numFpAluAccesses); numFpRegReads - .prereq(numFpRegReads); + .prereq(numFpRegReads); numIntAluAccesses - .prereq(numIntAluAccesses); + .prereq(numIntAluAccesses); numIntRegReads - .prereq(numIntRegReads); + .prereq(numIntRegReads); numIntRegWrites - .prereq(numIntRegWrites); + .prereq(numIntRegWrites); numMiscRegReads - .prereq(numMiscRegReads); + .prereq(numMiscRegReads); numMiscRegWrites - .prereq(numMiscRegWrites); + .prereq(numMiscRegWrites); numVecPredRegReads - .prereq(numVecPredRegReads); + .prereq(numVecPredRegReads); numVecPredRegWrites - .prereq(numVecPredRegWrites); + .prereq(numVecPredRegWrites); numVecRegReads - .prereq(numVecRegReads); + .prereq(numVecRegReads); numVecRegWrites - .prereq(numVecRegWrites); + .prereq(numVecRegWrites); } BaseCPU:: diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 5d0d3cab01..fc22abc5aa 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -716,6 +716,19 @@ class BaseCPU : public ClockedObject { ExecuteCPUStats(statistics::Group *parent, int thread_id); + /* Stat for total number of executed instructions */ + statistics::Scalar numInsts; + /* Number of executed nops */ + statistics::Scalar numNop; + /* Number of executed branches */ + statistics::Scalar numBranches; + /* Stat for total number of executed load instructions */ + statistics::Scalar numLoadInsts; + /* Number of executed store instructions */ + statistics::Formula numStoreInsts; + /* Number of instructions executed per cycle */ + statistics::Formula instRate; + /* Number of cycles stalled for D-cache responses */ statistics::Scalar dcacheStallCycles; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 7cf6c54542..92d281ce93 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -217,52 +217,14 @@ IEW::IEWStats::IEWStats(CPU *cpu) IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu) : statistics::Group(cpu), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of executed instructions"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions executed"), ADD_STAT(numSquashedInsts, statistics::units::Count::get(), "Number of squashed instructions skipped in execute"), ADD_STAT(numSwp, statistics::units::Count::get(), - "Number of swp insts executed"), - ADD_STAT(numNop, statistics::units::Count::get(), - "Number of nop insts executed"), - ADD_STAT(numRefs, statistics::units::Count::get(), - "Number of memory reference insts executed"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches executed"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of stores executed"), - ADD_STAT(numRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Inst execution rate", numInsts / cpu->baseStats.numCycles) + "Number of swp insts executed") { - numLoadInsts - .init(cpu->numThreads) - .flags(statistics::total); - numSwp .init(cpu->numThreads) .flags(statistics::total); - - numNop - .init(cpu->numThreads) - .flags(statistics::total); - - numRefs - .init(cpu->numThreads) - .flags(statistics::total); - - numBranches - .init(cpu->numThreads) - .flags(statistics::total); - - numStoreInsts - .flags(statistics::total); - numStoreInsts = numRefs - numLoadInsts; - - numRate - .flags(statistics::total); } void @@ -1053,7 +1015,7 @@ IEW::dispatchInsts(ThreadID tid) instQueue.recordProducer(inst); - iewStats.executedInstStats.numNop[tid]++; + cpu->executeStats[tid]->numNop++; add_to_iq = false; } else { @@ -1561,7 +1523,7 @@ IEW::updateExeInstStats(const DynInstPtr& inst) { ThreadID tid = inst->threadNumber; - iewStats.executedInstStats.numInsts++; + cpu->executeStats[tid]->numInsts++; #if TRACING_ON if (debug::O3PipeView) { @@ -1573,16 +1535,16 @@ IEW::updateExeInstStats(const DynInstPtr& inst) // Control operations // if (inst->isControl()) - iewStats.executedInstStats.numBranches[tid]++; + cpu->executeStats[tid]->numBranches++; // // Memory operations // if (inst->isMemRef()) { - iewStats.executedInstStats.numRefs[tid]++; + cpu->executeStats[tid]->numMemRefs++; if (inst->isLoad()) { - iewStats.executedInstStats.numLoadInsts[tid]++; + cpu->executeStats[tid]->numLoadInsts++; } } } diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 80fed295df..4fe8227dcc 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -455,25 +455,11 @@ class IEW { ExecutedInstStats(CPU *cpu); - /** Stat for total number of executed instructions. */ - statistics::Scalar numInsts; - /** Stat for total number of executed load instructions. */ - statistics::Vector numLoadInsts; /** Stat for total number of squashed instructions skipped at * execute. */ statistics::Scalar numSquashedInsts; /** Number of executed software prefetches. */ statistics::Vector numSwp; - /** Number of executed nops. */ - statistics::Vector numNop; - /** Number of executed meomory references. */ - statistics::Vector numRefs; - /** Number of executed branches. */ - statistics::Vector numBranches; - /** Number of executed store instructions. */ - statistics::Formula numStoreInsts; - /** Number of instructions executed per cycle. */ - statistics::Formula numRate; } executedInstStats; /** Number of instructions sent to commit. */ From 457d70df626a8cb0a7fa0ce63b3d3e0886a2bbda Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Thu, 19 Jan 2023 02:12:49 -0800 Subject: [PATCH 244/492] cpu-kvm: Implement IPC and CPI base stats for KVM CPU Replaced committedInsts stats of KVM CPU with commitStats.numInsts of BaseCPU. This results in IPC and CPI printing in stats.txt for KVM simulations. Change-Id: I02395630fc50a69adebf11f4ed39d9cefb852e1f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67396 Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg Tested-by: kokoro --- src/cpu/kvm/base.cc | 5 ++--- src/cpu/kvm/base.hh | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index b76bddc2fd..e22e1628d2 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -261,8 +261,6 @@ BaseKvmCPU::restartEqThread() BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent) : statistics::Group(parent), - ADD_STAT(committedInsts, statistics::units::Count::get(), - "Number of instructions committed"), ADD_STAT(numVMExits, statistics::units::Count::get(), "total number of KVM exits"), ADD_STAT(numVMHalfEntries, statistics::units::Count::get(), @@ -778,7 +776,8 @@ BaseKvmCPU::kvmRun(Tick ticks) /* Update statistics */ baseStats.numCycles += simCyclesExecuted;; - stats.committedInsts += instsExecuted; + commitStats[thread->threadId()]->numInsts += instsExecuted; + baseStats.numInsts += instsExecuted; ctrInsts += instsExecuted; DPRINTF(KvmRun, diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh index 2d81c7c7eb..7bbf393f9b 100644 --- a/src/cpu/kvm/base.hh +++ b/src/cpu/kvm/base.hh @@ -804,7 +804,6 @@ class BaseKvmCPU : public BaseCPU struct StatGroup : public statistics::Group { StatGroup(statistics::Group *parent); - statistics::Scalar committedInsts; statistics::Scalar numVMExits; statistics::Scalar numVMHalfEntries; statistics::Scalar numExitSignal; From 8a9a629bdb346b49d592d11367c2b6ba76702d52 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Fri, 17 Feb 2023 20:20:36 +0800 Subject: [PATCH 245/492] arch-riscv: Support PMP lock feature The lock feature will let M mode do memory permission check before R/W/X data. If the lock bit of pmpicfg set, then the pmpicfg and pmpaddri will ignore the update value later until CPU reset, and pmpaddri-1 will ignore if the TOR A field is set. The following is add in CL: 1. Add condition to run PMP check when any lock bit of pmp tables is set 2. Add PMP_LOCK bit check when try to update pmpaddr and pmpcfg 3. If there is no PMP entry matches and priviledge mode is M, no fault generated 4. If the address matches PMP entry, return no fault if priviledge mode is M and lock bit is not set For more details about PMP, please see RISC-V Spec Volumn II, Priviledge Archtecture, Ver 1.12, Section 3.7 Physical Memory Protection Change-Id: I3e7c5824d6c05f2ea928ee9ec7714f7271e4c58c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68057 Reviewed-by: Ayaz Akram Tested-by: kokoro Reviewed-by: Yu-hsin Wang Maintainer: Bobby Bruce --- src/arch/riscv/faults.cc | 10 ++++++ src/arch/riscv/isa.cc | 18 +++++++--- src/arch/riscv/pmp.cc | 75 ++++++++++++++++++++++++++++------------ src/arch/riscv/pmp.hh | 19 ++++++++-- 4 files changed, 93 insertions(+), 29 deletions(-) diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc index 3469c71252..940f7107ba 100644 --- a/src/arch/riscv/faults.cc +++ b/src/arch/riscv/faults.cc @@ -33,6 +33,8 @@ #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/isa.hh" +#include "arch/riscv/mmu.hh" +#include "arch/riscv/pmp.hh" #include "arch/riscv/regs/misc.hh" #include "arch/riscv/utility.hh" #include "cpu/base.hh" @@ -180,6 +182,14 @@ Reset::invoke(ThreadContext *tc, const StaticInstPtr &inst) tc->getIsaPtr()->newPCState(workload->getEntry()))); panic_if(!new_pc, "Failed create new PCState from ISA pointer"); tc->pcState(*new_pc); + + // Reset PMP Cfg + auto* mmu = dynamic_cast(tc->getMMUPtr()); + if (mmu == nullptr) { + warn("MMU is not Riscv MMU instance, we can't reset PMP"); + return; + } + mmu->getPMP()->pmpReset(); } void diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 3809c61d63..d778957b9e 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -2,6 +2,7 @@ * Copyright (c) 2016 RISC-V Foundation * Copyright (c) 2016 The University of Virginia * Copyright (c) 2020 Barkhausen Institut + * Copyright (c) 2022 Google LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -544,6 +545,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val) // qemu seems to update the tables when // pmp addr regs are written (with the assumption // that cfg regs are already written) + RegVal res = 0; + RegVal old_val = readMiscRegNoEffect(idx); for (int i=0; i < regSize; i++) { @@ -554,10 +557,15 @@ ISA::setMiscReg(RegIndex idx, RegVal val) // Form pmp_index using the index i and // PMPCFG register number uint32_t pmp_index = i+(4*(idx-MISCREG_PMPCFG0)); - mmu->getPMP()->pmpUpdateCfg(pmp_index,cfg_val); + bool result = mmu->getPMP()->pmpUpdateCfg(pmp_index,cfg_val); + if (result) { + res |= ((RegVal)cfg_val << (8*i)); + } else { + res |= (old_val & (0xFF << (8*i))); + } } - setMiscRegNoEffect(idx, val); + setMiscRegNoEffect(idx, res); } break; case MISCREG_PMPADDR00 ... MISCREG_PMPADDR15: @@ -568,9 +576,9 @@ ISA::setMiscReg(RegIndex idx, RegVal val) auto mmu = dynamic_cast (tc->getMMUPtr()); uint32_t pmp_index = idx-MISCREG_PMPADDR00; - mmu->getPMP()->pmpUpdateAddr(pmp_index, val); - - setMiscRegNoEffect(idx, val); + if (mmu->getPMP()->pmpUpdateAddr(pmp_index, val)) { + setMiscRegNoEffect(idx, val); + } } break; diff --git a/src/arch/riscv/pmp.cc b/src/arch/riscv/pmp.cc index 77ef98f2d0..940af47686 100644 --- a/src/arch/riscv/pmp.cc +++ b/src/arch/riscv/pmp.cc @@ -1,5 +1,6 @@ /* * Copyright (c) 2021 The Regents of the University of California + * Copyright (c) 2023 Google LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +28,6 @@ */ #include "arch/riscv/pmp.hh" - #include "arch/generic/tlb.hh" #include "arch/riscv/faults.hh" #include "arch/riscv/isa.hh" @@ -47,7 +47,8 @@ namespace gem5 PMP::PMP(const Params ¶ms) : SimObject(params), pmpEntries(params.pmp_entries), - numRules(0) + numRules(0), + hasLockEntry(false) { pmpTable.resize(pmpEntries); } @@ -70,10 +71,7 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode, req->getPaddr()); } - // An access should be successful if there are - // no rules defined yet or we are in M mode (based - // on specs v1.10) - if (numRules == 0 || (pmode == RiscvISA::PrivilegeMode::PRV_M)) + if (numRules == 0) return NoFault; // match_index will be used to identify the pmp entry @@ -94,20 +92,19 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode, if ((match_index > -1) && (PMP_OFF != pmpGetAField(pmpTable[match_index].pmpCfg))) { - // check the RWX permissions from the pmp entry - uint8_t allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC; + uint8_t this_cfg = pmpTable[match_index].pmpCfg; - // i is the index of pmp table which matched - allowed_privs &= pmpTable[match_index].pmpCfg; - - if ((mode == BaseMMU::Mode::Read) && - (PMP_READ & allowed_privs)) { + if ((pmode == RiscvISA::PrivilegeMode::PRV_M) && + (PMP_LOCK & this_cfg) == 0) { + return NoFault; + } else if ((mode == BaseMMU::Mode::Read) && + (PMP_READ & this_cfg)) { return NoFault; } else if ((mode == BaseMMU::Mode::Write) && - (PMP_WRITE & allowed_privs)) { + (PMP_WRITE & this_cfg)) { return NoFault; } else if ((mode == BaseMMU::Mode::Execute) && - (PMP_EXEC & allowed_privs)) { + (PMP_EXEC & this_cfg)) { return NoFault; } else { if (req->hasVaddr()) { @@ -119,7 +116,9 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode, } } // if no entry matched and we are not in M mode return fault - if (req->hasVaddr()) { + if (pmode == RiscvISA::PrivilegeMode::PRV_M) { + return NoFault; + } else if (req->hasVaddr()) { return createAddrfault(req->getVaddr(), mode); } else { return createAddrfault(vaddr, mode); @@ -150,17 +149,19 @@ PMP::pmpGetAField(uint8_t cfg) } -void +bool PMP::pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg) { DPRINTF(PMP, "Update pmp config with %u for pmp entry: %u \n", (unsigned)this_cfg, pmp_index); - - warn_if((PMP_LOCK & this_cfg), "pmp lock feature is not supported.\n"); - + if (pmpTable[pmp_index].pmpCfg & PMP_LOCK) { + DPRINTF(PMP, "Update pmp entry config %u failed because it locked\n", + pmp_index); + return false; + } pmpTable[pmp_index].pmpCfg = this_cfg; pmpUpdateRule(pmp_index); - + return true; } void @@ -170,6 +171,7 @@ PMP::pmpUpdateRule(uint32_t pmp_index) // pmpaddr/pmpcfg is written numRules = 0; + hasLockEntry = false; Addr prevAddr = 0; if (pmp_index >= 1) { @@ -209,15 +211,42 @@ PMP::pmpUpdateRule(uint32_t pmp_index) if (PMP_OFF != a_field) { numRules++; } + hasLockEntry |= ((pmpTable[i].pmpCfg & PMP_LOCK) != 0); + } + + if (hasLockEntry) { + DPRINTF(PMP, "Find lock entry\n"); } } void +PMP::pmpReset() +{ + for (uint32_t i = 0; i < pmpTable.size(); i++) { + pmpTable[i].pmpCfg &= ~(PMP_A_MASK | PMP_LOCK); + pmpUpdateRule(i); + } +} + +bool PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr) { DPRINTF(PMP, "Update pmp addr %#x for pmp entry %u \n", this_addr, pmp_index); + if (pmpTable[pmp_index].pmpCfg & PMP_LOCK) { + DPRINTF(PMP, "Update pmp entry %u failed because the lock bit set\n", + pmp_index); + return false; + } else if (pmp_index < pmpTable.size() - 1 && + ((pmpTable[pmp_index+1].pmpCfg & PMP_LOCK) != 0) && + pmpGetAField(pmpTable[pmp_index+1].pmpCfg) == PMP_TOR) { + DPRINTF(PMP, "Update pmp entry %u failed because the entry %u lock bit set" + "and A field is TOR\n", + pmp_index, pmp_index+1); + return false; + } + // just writing the raw addr in the pmp table // will convert it into a range, once cfg // reg is written @@ -225,6 +254,8 @@ PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr) for (int index = 0; index < pmpEntries; index++) { pmpUpdateRule(index); } + + return true; } bool @@ -247,7 +278,7 @@ PMP::shouldCheckPMP(RiscvISA::PrivilegeMode pmode, bool cond3 = (mode != BaseMMU::Execute && (status.mprv) && (status.mpp != RiscvISA::PrivilegeMode::PRV_M)); - return (cond1 || cond2 || cond3); + return (cond1 || cond2 || cond3 || hasLockEntry); } AddrRange diff --git a/src/arch/riscv/pmp.hh b/src/arch/riscv/pmp.hh index 1509646850..24cb4ad1ca 100644 --- a/src/arch/riscv/pmp.hh +++ b/src/arch/riscv/pmp.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2021 The Regents of the University of California + * Copyright (c) 2023 Google LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -85,12 +86,18 @@ class PMP : public SimObject /** pmpcfg address range execute permission mask */ const uint8_t PMP_EXEC = 1 << 2; + /** pmpcfg A field mask */ + const uint8_t PMP_A_MASK = 3 << 3; + /** pmpcfg address range locked mask */ const uint8_t PMP_LOCK = 1 << 7; /** variable to keep track of active number of rules any time */ int numRules; + /** variable to keep track of any lock of entry */ + bool hasLockEntry; + /** single pmp entry struct*/ struct PmpEntry { @@ -127,8 +134,9 @@ class PMP : public SimObject * rule of corresponding pmp entry. * @param pmp_index pmp entry index. * @param this_cfg value to be written to pmpcfg. + * @returns true if update pmpicfg success */ - void pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg); + bool pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg); /** * pmpUpdateAddr updates the pmpaddr for a pmp @@ -136,8 +144,15 @@ class PMP : public SimObject * rule of corresponding pmp entry. * @param pmp_index pmp entry index. * @param this_addr value to be written to pmpaddr. + * @returns true if update pmpaddri success */ - void pmpUpdateAddr(uint32_t pmp_index, Addr this_addr); + bool pmpUpdateAddr(uint32_t pmp_index, Addr this_addr); + + /** + * pmpReset reset when reset signal in trigger from + * CPU. + */ + void pmpReset(); private: /** From b440355cbce50c189ed7a3d42586f6eb0fc5887f Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Sat, 7 Jan 2023 16:08:11 -0600 Subject: [PATCH 246/492] tests: cleanup m5out directly in weekly The weekly test script was implicitly assuming that no m5out directory existed in the folder where the script was run. However, if a prior test ran and failed, it would not clean up its m5out directory, causing the weekly tests to fail. This commit resolves this by removing the m5out directory before trying to run any tests in the weekly script. Moreover, we also update the weekly script to explicitly remove this m5out directory at the end of the script. Change-Id: If10c59034528e171cc2c5dacb928b3a81d6b8c50 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67198 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- tests/weekly.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/weekly.sh b/tests/weekly.sh index c7f834b7a5..f9d3e4bc04 100755 --- a/tests/weekly.sh +++ b/tests/weekly.sh @@ -70,13 +70,14 @@ mkdir -p tests/testing-results # GPU weekly tests start here # before pulling gem5 resources, make sure it doesn't exist already -docker run --rm --volume "${gem5_root}":"${gem5_root}" -w \ +docker run -u $UID:$GID --rm --volume "${gem5_root}":"${gem5_root}" -w \ "${gem5_root}" --memory="${docker_mem_limit}" \ gcr.io/gem5-test/gcn-gpu:${tag} bash -c \ "rm -rf ${gem5_root}/gem5-resources" -# delete Pannotia datasets and output files in case a failed regression run left -# them around -rm -f coAuthorsDBLP.graph 1k_128k.gr result.out + +# delete m5out, Pannotia datasets, and output files in case a failed regression +# run left them around +rm -rf ${gem5_root}/m5out coAuthorsDBLP.graph 1k_128k.gr result.out # Pull gem5 resources to the root of the gem5 directory -- currently the # pre-built binares for LULESH are out-of-date and won't run correctly with @@ -383,5 +384,8 @@ docker run --rm --volume "${gem5_root}":"${gem5_root}" -w \ "${gem5_root}" --memory="${docker_mem_limit}" hacc-test-weekly bash -c \ "rm -rf ${gem5_root}/gem5-resources" +# Delete the gem5 m5out folder we created +rm -rf ${gem5_root}/m5out + # delete Pannotia datasets we downloaded and output files it created rm -f coAuthorsDBLP.graph 1k_128k.gr result.out From 18ba4e12788c2e7d39d204961e95007ad8a236d7 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 2 Mar 2023 11:35:46 +0000 Subject: [PATCH 247/492] tests: Fix GCC -W(maybe-)uninitialized warnings These all look like valid (but harmless) diagnostics to me and are all simple to fix. Most of them can be fixed by using ASSERT_* variants of the GTest checkers to ensure that the remainder of the function is not executed and the uninitialized result isn't touched. Change-Id: Ib5fe2ac2ec539c880d670ebc3321ce98940c7e38 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68517 Tested-by: kokoro Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Reviewed-by: Daniel Carvalho --- src/base/circlebuf.test.cc | 2 +- src/base/str.test.cc | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/base/circlebuf.test.cc b/src/base/circlebuf.test.cc index 02fe3961d4..c7913f7a27 100644 --- a/src/base/circlebuf.test.cc +++ b/src/base/circlebuf.test.cc @@ -136,7 +136,7 @@ TEST(CircleBufTest, PointerWrapAround) TEST(CircleBufTest, ProduceConsumeEmpty) { CircleBuf buf(8); - char foo[1]; + char foo[1] = {'a'}; // buf is empty to begin with. EXPECT_TRUE(buf.empty()); diff --git a/src/base/str.test.cc b/src/base/str.test.cc index f999c98825..a08f984416 100644 --- a/src/base/str.test.cc +++ b/src/base/str.test.cc @@ -254,7 +254,7 @@ TEST(StrTest, ToNumber8BitInt) { int8_t output; std::string input = "-128"; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(-128, output); } @@ -276,7 +276,7 @@ TEST(StrTest, ToNumberUnsigned8BitInt) { uint8_t output; std::string input = "255"; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(255, output); } @@ -292,11 +292,11 @@ TEST(StrTest, ToNumberUnsigned8BitIntRoundDown) { uint8_t output; std::string input_1 = "2.99"; - EXPECT_TRUE(to_number(input_1, output)); + ASSERT_TRUE(to_number(input_1, output)); EXPECT_EQ(2, output); std::string input_2 = "3.99"; - EXPECT_TRUE(to_number(input_2, output)); + ASSERT_TRUE(to_number(input_2, output)); EXPECT_EQ(3, output); } @@ -308,7 +308,7 @@ TEST(StrTest, ToNumber8BitUnsignedLimit) { uint8_t output; std::string input = "255.99"; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(255, output); } @@ -344,7 +344,7 @@ TEST(StrTest, ToNumber64BitInt) int64_t output; int64_t input_number = 0xFFFFFFFFFFFFFFFF; std::string input = std::to_string(input_number); - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(input_number, output); } @@ -363,7 +363,7 @@ TEST(StrTest, ToNumberEnum) }; Number output; std::string input = "2"; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(TWO, output); } @@ -384,7 +384,7 @@ TEST(StrTest, ToNumberFloat) float output; std::string input = "0.1"; float expected_output = 0.1; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } @@ -393,7 +393,7 @@ TEST(StrTest, ToNumberFloatIntegerString) float output; std::string input = "10"; float expected_output = 10.0; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } @@ -402,7 +402,7 @@ TEST(StrTest, ToNumberFloatNegative) float output; std::string input = "-0.1"; float expected_output = -0.1; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } @@ -411,7 +411,7 @@ TEST(StrTest, ToNumberDouble) double output; std::string input = "0.0001"; double expected_output = 0.0001; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } @@ -420,7 +420,7 @@ TEST(StrTest, ToNumberDoubleIntegerString) double output; std::string input = "12345"; double expected_output = 12345.0; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } @@ -429,7 +429,7 @@ TEST(StrTest, ToNumberDoubleNegative) double output; std::string input = "-1.2345"; double expected_output = -1.2345; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } @@ -439,7 +439,7 @@ TEST(StrTest, ToNumberScientific) double output; std::string input = "8.234e+08"; double expected_output = 823400000; - EXPECT_TRUE(to_number(input, output)); + ASSERT_TRUE(to_number(input, output)); EXPECT_EQ(expected_output, output); } From 85342dbb0eca5b05029bf3376a8af1e598cfd840 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Tue, 7 Mar 2023 09:21:26 -0800 Subject: [PATCH 248/492] cpu: Revert CPU stats changes This reverts this relationchain: https://gem5-review.googlesource.com/c/public/gem5/+/67396/6 This was pre-maturely submitted before all testing and checking was done. To be safe this has been reverted. When all testing and checks are completed, this revert will be undone. Change-Id: I2a88cadfee03c1fc81932e6548938db108786dd2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68717 Reviewed-by: Jason Lowe-Power Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/cpu/base.cc | 250 --------------------------------- src/cpu/base.hh | 142 ------------------- src/cpu/kvm/base.cc | 5 +- src/cpu/kvm/base.hh | 1 + src/cpu/minor/execute.cc | 47 +++++-- src/cpu/minor/stats.cc | 36 ++++- src/cpu/minor/stats.hh | 22 +++ src/cpu/o3/commit.cc | 73 ++++++++-- src/cpu/o3/commit.hh | 16 +++ src/cpu/o3/cpu.cc | 167 +++++++++++++++++----- src/cpu/o3/cpu.hh | 41 +++++- src/cpu/o3/dyn_inst.hh | 14 +- src/cpu/o3/fetch.cc | 33 ++++- src/cpu/o3/fetch.hh | 10 ++ src/cpu/o3/iew.cc | 50 ++++++- src/cpu/o3/iew.hh | 14 ++ src/cpu/simple/base.cc | 67 +++------ src/cpu/simple/base.hh | 2 - src/cpu/simple/exec_context.hh | 180 +++++++++++++++++++++--- 19 files changed, 628 insertions(+), 542 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index cee76472f5..d2c0a78d44 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -191,30 +191,6 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) modelResetPort.onChange([this](const bool &new_val) { setReset(new_val); }); - // create a stat group object for each thread on this core - fetchStats.reserve(numThreads); - executeStats.reserve(numThreads); - commitStats.reserve(numThreads); - for (int i = 0; i < numThreads; i++) { - // create fetchStat object for thread i and set rate formulas - FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i); - fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles; - fetchStatptr->branchRate = fetchStatptr->numBranches / - baseStats.numCycles; - fetchStats.emplace_back(fetchStatptr); - - // create executeStat object for thread i and set rate formulas - ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i); - executeStatptr->instRate = executeStatptr->numInsts / - baseStats.numCycles; - executeStats.emplace_back(executeStatptr); - - // create commitStat object for thread i and set ipc, cpi formulas - CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); - commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles; - commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts; - commitStats.emplace_back(commitStatptr); - } } void @@ -407,28 +383,13 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc) BaseCPU:: BaseCPUStats::BaseCPUStats(statistics::Group *parent) : statistics::Group(parent), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed (core level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed (core level)"), ADD_STAT(numCycles, statistics::units::Cycle::get(), "Number of cpu cycles simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction (core level)"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle (core level)"), ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(), "Number of work items this cpu started"), ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(), "Number of work items this cpu completed") { - cpi.precision(6); - cpi = numCycles / numInsts; - - ipc.precision(6); - ipc = numInsts / numCycles; } void @@ -866,215 +827,4 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent) hostOpRate = simOps / hostSeconds; } -BaseCPU:: -FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions fetched (thread level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) fetched (thread level)"), - ADD_STAT(fetchRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Number of inst fetches per cycle"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches fetched"), - ADD_STAT(branchRate, statistics::units::Ratio::get(), - "Number of branch fetches per cycle"), - ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), - "ICache total stall cycles"), - ADD_STAT(numFetchSuspends, statistics::units::Count::get(), - "Number of times Execute suspended instruction fetching") - -{ - fetchRate - .flags(statistics::total); - - numBranches - .prereq(numBranches); - - branchRate - .flags(statistics::total); - - icacheStallCycles - .prereq(icacheStallCycles); - -} - -// means it is incremented in a vector indexing and not directly -BaseCPU:: -ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of executed instructions"), - ADD_STAT(numNop, statistics::units::Count::get(), - "Number of nop insts executed"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches executed"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions executed"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of stores executed"), - ADD_STAT(instRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Inst execution rate"), - ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), - "DCache total stall cycles"), - ADD_STAT(numCCRegReads, statistics::units::Count::get(), - "Number of times the CC registers were read"), - ADD_STAT(numCCRegWrites, statistics::units::Count::get(), - "Number of times the CC registers were written"), - ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), - "Number of float alu accesses"), - ADD_STAT(numFpRegReads, statistics::units::Count::get(), - "Number of times the floating registers were read"), - ADD_STAT(numFpRegWrites, statistics::units::Count::get(), - "Number of times the floating registers were written"), - ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), - "Number of integer alu accesses"), - ADD_STAT(numIntRegReads, statistics::units::Count::get(), - "Number of times the integer registers were read"), - ADD_STAT(numIntRegWrites, statistics::units::Count::get(), - "Number of times the integer registers were written"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory refs"), - ADD_STAT(numMiscRegReads, statistics::units::Count::get(), - "Number of times the Misc registers were read"), - ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), - "Number of times the Misc registers were written"), - ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), - "Number of vector alu accesses"), - ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), - "Number of times the predicate registers were read"), - ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), - "Number of times the predicate registers were written"), - ADD_STAT(numVecRegReads, statistics::units::Count::get(), - "Number of times the vector registers were read"), - ADD_STAT(numVecRegWrites, statistics::units::Count::get(), - "Number of times the vector registers were written"), - ADD_STAT(numDiscardedOps, statistics::units::Count::get(), - "Number of ops (including micro ops) which were discarded before " - "commit") -{ - numStoreInsts = numMemRefs - numLoadInsts; - - dcacheStallCycles - .prereq(dcacheStallCycles); - numCCRegReads - .prereq(numCCRegReads) - .flags(statistics::nozero); - numCCRegWrites - .prereq(numCCRegWrites) - .flags(statistics::nozero); - numFpAluAccesses - .prereq(numFpAluAccesses); - numFpRegReads - .prereq(numFpRegReads); - numIntAluAccesses - .prereq(numIntAluAccesses); - numIntRegReads - .prereq(numIntRegReads); - numIntRegWrites - .prereq(numIntRegWrites); - numMiscRegReads - .prereq(numMiscRegReads); - numMiscRegWrites - .prereq(numMiscRegWrites); - numVecPredRegReads - .prereq(numVecPredRegReads); - numVecPredRegWrites - .prereq(numVecPredRegWrites); - numVecRegReads - .prereq(numVecRegReads); - numVecRegWrites - .prereq(numVecRegWrites); -} - -BaseCPU:: -CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed (thread level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed (thread level)"), - ADD_STAT(numInstsNotNOP, statistics::units::Count::get(), - "Number of instructions committed excluding NOPs or prefetches"), - ADD_STAT(numOpsNotNOP, statistics::units::Count::get(), - "Number of Ops (including micro ops) Simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction (thread level)"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle (thread level)"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory references committed"), - ADD_STAT(numFpInsts, statistics::units::Count::get(), - "Number of float instructions"), - ADD_STAT(numIntInsts, statistics::units::Count::get(), - "Number of integer instructions"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of store instructions"), - ADD_STAT(numVecInsts, statistics::units::Count::get(), - "Number of vector instructions"), - ADD_STAT(committedInstType, statistics::units::Count::get(), - "Class of committed instruction."), - ADD_STAT(committedControl, statistics::units::Count::get(), - "Class of control type instructions committed") -{ - numInsts - .prereq(numInsts); - - cpi.precision(6); - ipc.precision(6); - - committedInstType - .init(enums::Num_OpClass) - .flags(statistics::total | statistics::pdf | statistics::dist); - - for (unsigned i = 0; i < Num_OpClasses; ++i) { - committedInstType.subname(i, enums::OpClassStrings[i]); - } - - committedControl - .init(StaticInstFlags::Flags::Num_Flags) - .flags(statistics::nozero); - - for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) { - committedControl.subname(i, StaticInstFlags::FlagsStrings[i]); - } -} - - -void -BaseCPU:: -CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst) -{ - /* Add a count for every control instruction type */ - if (staticInst->isControl()) { - if (staticInst->isReturn()) { - committedControl[gem5::StaticInstFlags::Flags::IsReturn]++; - } - if (staticInst->isCall()) { - committedControl[gem5::StaticInstFlags::Flags::IsCall]++; - } - if (staticInst->isDirectCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++; - } - if (staticInst->isIndirectCtrl()) { - committedControl - [gem5::StaticInstFlags::Flags::IsIndirectControl]++; - } - if (staticInst->isCondCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++; - } - if (staticInst->isUncondCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++; - } - committedControl[gem5::StaticInstFlags::Flags::IsControl]++; - } - -} - } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index fc22abc5aa..084d9b9305 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -43,7 +43,6 @@ #define __CPU_BASE_HH__ #include -#include #include "arch/generic/interrupts.hh" #include "base/statistics.hh" @@ -633,14 +632,8 @@ class BaseCPU : public ClockedObject struct BaseCPUStats : public statistics::Group { BaseCPUStats(statistics::Group *parent); - // Number of CPU insts and ops committed at CPU core level - statistics::Scalar numInsts; - statistics::Scalar numOps; // Number of CPU cycles simulated statistics::Scalar numCycles; - /* CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; statistics::Scalar numWorkItemsStarted; statistics::Scalar numWorkItemsCompleted; } baseStats; @@ -683,141 +676,6 @@ class BaseCPU : public ClockedObject const Cycles pwrGatingLatency; const bool powerGatingOnIdle; EventFunctionWrapper enterPwrGatingEvent; - - public: - struct FetchCPUStats : public statistics::Group - { - FetchCPUStats(statistics::Group *parent, int thread_id); - - /* Total number of instructions fetched */ - statistics::Scalar numInsts; - - /* Total number of operations fetched */ - statistics::Scalar numOps; - - /* Number of instruction fetched per cycle. */ - statistics::Formula fetchRate; - - /* Total number of branches fetched */ - statistics::Scalar numBranches; - - /* Number of branch fetches per cycle. */ - statistics::Formula branchRate; - - /* Number of cycles stalled due to an icache miss */ - statistics::Scalar icacheStallCycles; - - /* Number of times fetch was asked to suspend by Execute */ - statistics::Scalar numFetchSuspends; - - }; - - struct ExecuteCPUStats: public statistics::Group - { - ExecuteCPUStats(statistics::Group *parent, int thread_id); - - /* Stat for total number of executed instructions */ - statistics::Scalar numInsts; - /* Number of executed nops */ - statistics::Scalar numNop; - /* Number of executed branches */ - statistics::Scalar numBranches; - /* Stat for total number of executed load instructions */ - statistics::Scalar numLoadInsts; - /* Number of executed store instructions */ - statistics::Formula numStoreInsts; - /* Number of instructions executed per cycle */ - statistics::Formula instRate; - - /* Number of cycles stalled for D-cache responses */ - statistics::Scalar dcacheStallCycles; - - /* Number of condition code register file accesses */ - statistics::Scalar numCCRegReads; - statistics::Scalar numCCRegWrites; - - /* number of float alu accesses */ - statistics::Scalar numFpAluAccesses; - - /* Number of float register file accesses */ - statistics::Scalar numFpRegReads; - statistics::Scalar numFpRegWrites; - - /* Number of integer alu accesses */ - statistics::Scalar numIntAluAccesses; - - /* Number of integer register file accesses */ - statistics::Scalar numIntRegReads; - statistics::Scalar numIntRegWrites; - - /* number of simulated memory references */ - statistics::Scalar numMemRefs; - - /* Number of misc register file accesses */ - statistics::Scalar numMiscRegReads; - statistics::Scalar numMiscRegWrites; - - /* Number of vector alu accesses */ - statistics::Scalar numVecAluAccesses; - - /* Number of predicate register file accesses */ - mutable statistics::Scalar numVecPredRegReads; - statistics::Scalar numVecPredRegWrites; - - /* Number of vector register file accesses */ - mutable statistics::Scalar numVecRegReads; - statistics::Scalar numVecRegWrites; - - /* Number of ops discarded before committing */ - statistics::Scalar numDiscardedOps; - }; - - struct CommitCPUStats: public statistics::Group - { - CommitCPUStats(statistics::Group *parent, int thread_id); - - /* Number of simulated instructions committed */ - statistics::Scalar numInsts; - statistics::Scalar numOps; - - /* Number of instructions committed that are not NOP or prefetches */ - statistics::Scalar numInstsNotNOP; - statistics::Scalar numOpsNotNOP; - - /* CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; - - /* Number of committed memory references. */ - statistics::Scalar numMemRefs; - - /* Number of float instructions */ - statistics::Scalar numFpInsts; - - /* Number of int instructions */ - statistics::Scalar numIntInsts; - - /* number of load instructions */ - statistics::Scalar numLoadInsts; - - /* Number of store instructions */ - statistics::Scalar numStoreInsts; - - /* Number of vector instructions */ - statistics::Scalar numVecInsts; - - /* Number of instructions committed by type (OpClass) */ - statistics::Vector committedInstType; - - /* number of control instructions committed by control inst type */ - statistics::Vector committedControl; - void updateComCtrlStats(const StaticInstPtr staticInst); - - }; - - std::vector> fetchStats; - std::vector> executeStats; - std::vector> commitStats; }; } // namespace gem5 diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index e22e1628d2..b76bddc2fd 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -261,6 +261,8 @@ BaseKvmCPU::restartEqThread() BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent) : statistics::Group(parent), + ADD_STAT(committedInsts, statistics::units::Count::get(), + "Number of instructions committed"), ADD_STAT(numVMExits, statistics::units::Count::get(), "total number of KVM exits"), ADD_STAT(numVMHalfEntries, statistics::units::Count::get(), @@ -776,8 +778,7 @@ BaseKvmCPU::kvmRun(Tick ticks) /* Update statistics */ baseStats.numCycles += simCyclesExecuted;; - commitStats[thread->threadId()]->numInsts += instsExecuted; - baseStats.numInsts += instsExecuted; + stats.committedInsts += instsExecuted; ctrInsts += instsExecuted; DPRINTF(KvmRun, diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh index 7bbf393f9b..2d81c7c7eb 100644 --- a/src/cpu/kvm/base.hh +++ b/src/cpu/kvm/base.hh @@ -804,6 +804,7 @@ class BaseKvmCPU : public BaseCPU struct StatGroup : public statistics::Group { StatGroup(statistics::Group *parent); + statistics::Scalar committedInsts; statistics::Scalar numVMExits; statistics::Scalar numVMHalfEntries; statistics::Scalar numExitSignal; diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 2908c2266f..5eaaf5804e 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -871,18 +871,49 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) { thread->numInst++; thread->threadStats.numInsts++; - cpu.commitStats[inst->id.threadId]->numInsts++; - cpu.baseStats.numInsts++; + cpu.stats.numInsts++; /* Act on events related to instruction counts */ thread->comInstEventQueue.serviceEvents(thread->numInst); } thread->numOp++; thread->threadStats.numOps++; - cpu.commitStats[inst->id.threadId]->numOps++; - cpu.baseStats.numOps++; - cpu.commitStats[inst->id.threadId] - ->committedInstType[inst->staticInst->opClass()]++; + cpu.stats.numOps++; + cpu.stats.committedInstType[inst->id.threadId] + [inst->staticInst->opClass()]++; + + /** Add a count for every control instruction */ + if (inst->staticInst->isControl()) { + if (inst->staticInst->isReturn()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsReturn]++; + } + if (inst->staticInst->isCall()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsCall]++; + } + if (inst->staticInst->isDirectCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsDirectControl]++; + } + if (inst->staticInst->isIndirectCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsIndirectControl]++; + } + if (inst->staticInst->isCondCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsCondControl]++; + } + if (inst->staticInst->isUncondCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsUncondControl]++; + + } + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsControl]++; + } + + /* Set the CP SeqNum to the numOps commit number */ if (inst->traceData) @@ -1023,7 +1054,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" " inst: %s\n", thread_id, *inst); - cpu.fetchStats[thread_id]->numFetchSuspends++; + cpu.stats.numFetchSuspends++; updateBranchData(thread_id, BranchData::SuspendThread, inst, resume_pc, branch); @@ -1337,7 +1368,7 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, *inst, ex_info.streamSeqNum); if (fault == NoFault) - cpu.executeStats[thread_id]->numDiscardedOps++; + cpu.stats.numDiscardedOps++; } /* Mark the mem inst as being in the LSQ */ diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index e31cbe93a1..64d4c475e0 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -45,13 +45,47 @@ namespace minor MinorStats::MinorStats(BaseCPU *base_cpu) : statistics::Group(base_cpu), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(numDiscardedOps, statistics::units::Count::get(), + "Number of ops (including micro ops) which were discarded before " + "commit"), + ADD_STAT(numFetchSuspends, statistics::units::Count::get(), + "Number of times Execute suspended instruction fetching"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") + "for an interrupt"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle"), + ADD_STAT(committedInstType, statistics::units::Count::get(), + "Class of committed instruction"), + ADD_STAT(committedControl, statistics::units::Count::get(), + "Class of control type instructions committed") { quiesceCycles.prereq(quiesceCycles); + cpi.precision(6); + cpi = base_cpu->baseStats.numCycles / numInsts; + + ipc.precision(6); + ipc = numInsts / base_cpu->baseStats.numCycles; + + committedInstType + .init(base_cpu->numThreads, enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + committedInstType.ysubnames(enums::OpClassStrings); + + committedControl + .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags) + .flags(statistics::nozero); + committedControl.ysubnames(StaticInstFlags::FlagsStrings); } } // namespace minor diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index 98ac80f15c..1ab81f4407 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -59,9 +59,31 @@ struct MinorStats : public statistics::Group { MinorStats(BaseCPU *parent); + /** Number of simulated instructions */ + statistics::Scalar numInsts; + + /** Number of simulated insts and microops */ + statistics::Scalar numOps; + + /** Number of ops discarded before committing */ + statistics::Scalar numDiscardedOps; + + /** Number of times fetch was asked to suspend by Execute */ + statistics::Scalar numFetchSuspends; + /** Number of cycles in quiescent state */ statistics::Scalar quiesceCycles; + /** CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; + + /** Number of instructions by type (OpClass) */ + statistics::Vector2d committedInstType; + + /** Number of branches commited */ + statistics::Vector2d committedControl; + }; } // namespace minor diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index e1f01680ca..38dce831b1 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -156,10 +156,25 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) "The number of times a branch was mispredicted"), ADD_STAT(numCommittedDist, statistics::units::Count::get(), "Number of insts commited each cycle"), + ADD_STAT(instsCommitted, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(opsCommitted, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(memRefs, statistics::units::Count::get(), + "Number of memory references committed"), + ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"), ADD_STAT(amos, statistics::units::Count::get(), "Number of atomic instructions committed"), ADD_STAT(membars, statistics::units::Count::get(), "Number of memory barriers committed"), + ADD_STAT(branches, statistics::units::Count::get(), + "Number of branches committed"), + ADD_STAT(vectorInstructions, statistics::units::Count::get(), + "Number of committed Vector instructions."), + ADD_STAT(floating, statistics::units::Count::get(), + "Number of committed floating point instructions."), + ADD_STAT(integer, statistics::units::Count::get(), + "Number of committed integer instructions."), ADD_STAT(functionCalls, statistics::units::Count::get(), "Number of function calls committed."), ADD_STAT(committedInstType, statistics::units::Count::get(), @@ -177,6 +192,22 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(0,commit->commitWidth,1) .flags(statistics::pdf); + instsCommitted + .init(cpu->numThreads) + .flags(total); + + opsCommitted + .init(cpu->numThreads) + .flags(total); + + memRefs + .init(cpu->numThreads) + .flags(total); + + loads + .init(cpu->numThreads) + .flags(total); + amos .init(cpu->numThreads) .flags(total); @@ -185,6 +216,22 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(cpu->numThreads) .flags(total); + branches + .init(cpu->numThreads) + .flags(total); + + vectorInstructions + .init(cpu->numThreads) + .flags(total); + + floating + .init(cpu->numThreads) + .flags(total); + + integer + .init(cpu->numThreads) + .flags(total); + functionCalls .init(commit->numThreads) .flags(total); @@ -1336,12 +1383,9 @@ Commit::updateComInstStats(const DynInstPtr &inst) { ThreadID tid = inst->threadNumber; - if (!inst->isMicroop() || inst->isLastMicroop()) { - cpu->commitStats[tid]->numInsts++; - cpu->baseStats.numInsts++; - } - cpu->commitStats[tid]->numOps++; - cpu->baseStats.numOps++; + if (!inst->isMicroop() || inst->isLastMicroop()) + stats.instsCommitted[tid]++; + stats.opsCommitted[tid]++; // To match the old model, don't count nops and instruction // prefetches towards the total commit count. @@ -1352,20 +1396,21 @@ Commit::updateComInstStats(const DynInstPtr &inst) // // Control Instructions // - cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst); + if (inst->isControl()) + stats.branches[tid]++; // // Memory references // if (inst->isMemRef()) { - cpu->commitStats[tid]->numMemRefs++; + stats.memRefs[tid]++; if (inst->isLoad()) { - cpu->commitStats[tid]->numLoadInsts++; + stats.loads[tid]++; } - if (inst->isStore()) { - cpu->commitStats[tid]->numStoreInsts++; + if (inst->isAtomic()) { + stats.amos[tid]++; } } @@ -1375,14 +1420,14 @@ Commit::updateComInstStats(const DynInstPtr &inst) // Integer Instruction if (inst->isInteger()) - cpu->commitStats[tid]->numIntInsts++; + stats.integer[tid]++; // Floating Point Instruction if (inst->isFloating()) - cpu->commitStats[tid]->numFpInsts++; + stats.floating[tid]++; // Vector Instruction if (inst->isVector()) - cpu->commitStats[tid]->numVecInsts++; + stats.vectorInstructions[tid]++; // Function Calls if (inst->isCall()) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index eccd023d45..cf4eaf5d92 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -479,10 +479,26 @@ class Commit /** Distribution of the number of committed instructions each cycle. */ statistics::Distribution numCommittedDist; + /** Total number of instructions committed. */ + statistics::Vector instsCommitted; + /** Total number of ops (including micro ops) committed. */ + statistics::Vector opsCommitted; + /** Stat for the total number of committed memory references. */ + statistics::Vector memRefs; + /** Stat for the total number of committed loads. */ + statistics::Vector loads; /** Stat for the total number of committed atomics. */ statistics::Vector amos; /** Total number of committed memory barriers. */ statistics::Vector membars; + /** Total number of committed branches. */ + statistics::Vector branches; + /** Total number of vector instructions */ + statistics::Vector vectorInstructions; + /** Total number of floating point instructions */ + statistics::Vector floating; + /** Total number of integer instructions */ + statistics::Vector integer; /** Total number of function calls */ statistics::Vector functionCalls; /** Committed instructions by instruction type (OpClass) */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 93c58fef63..d2bacaa523 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -328,7 +328,47 @@ CPU::CPUStats::CPUStats(CPU *cpu) "to idling"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") + "for an interrupt"), + ADD_STAT(committedInsts, statistics::units::Count::get(), + "Number of Instructions Simulated"), + ADD_STAT(committedOps, statistics::units::Count::get(), + "Number of Ops (including micro ops) Simulated"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: Cycles Per Instruction"), + ADD_STAT(totalCpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: Total CPI of All Threads"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: Instructions Per Cycle"), + ADD_STAT(totalIpc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: Total IPC of All Threads"), + ADD_STAT(intRegfileReads, statistics::units::Count::get(), + "Number of integer regfile reads"), + ADD_STAT(intRegfileWrites, statistics::units::Count::get(), + "Number of integer regfile writes"), + ADD_STAT(fpRegfileReads, statistics::units::Count::get(), + "Number of floating regfile reads"), + ADD_STAT(fpRegfileWrites, statistics::units::Count::get(), + "Number of floating regfile writes"), + ADD_STAT(vecRegfileReads, statistics::units::Count::get(), + "number of vector regfile reads"), + ADD_STAT(vecRegfileWrites, statistics::units::Count::get(), + "number of vector regfile writes"), + ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(), + "number of predicate regfile reads"), + ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(), + "number of predicate regfile writes"), + ADD_STAT(ccRegfileReads, statistics::units::Count::get(), + "number of cc regfile reads"), + ADD_STAT(ccRegfileWrites, statistics::units::Count::get(), + "number of cc regfile writes"), + ADD_STAT(miscRegfileReads, statistics::units::Count::get(), + "number of misc regfile reads"), + ADD_STAT(miscRegfileWrites, statistics::units::Count::get(), + "number of misc regfile writes") { // Register any of the O3CPU's stats here. timesIdled @@ -340,6 +380,69 @@ CPU::CPUStats::CPUStats(CPU *cpu) quiesceCycles .prereq(quiesceCycles); + // Number of Instructions simulated + // -------------------------------- + // Should probably be in Base CPU but need templated + // MaxThreads so put in here instead + committedInsts + .init(cpu->numThreads) + .flags(statistics::total); + + committedOps + .init(cpu->numThreads) + .flags(statistics::total); + + cpi + .precision(6); + cpi = cpu->baseStats.numCycles / committedInsts; + + totalCpi + .precision(6); + totalCpi = cpu->baseStats.numCycles / sum(committedInsts); + + ipc + .precision(6); + ipc = committedInsts / cpu->baseStats.numCycles; + + totalIpc + .precision(6); + totalIpc = sum(committedInsts) / cpu->baseStats.numCycles; + + intRegfileReads + .prereq(intRegfileReads); + + intRegfileWrites + .prereq(intRegfileWrites); + + fpRegfileReads + .prereq(fpRegfileReads); + + fpRegfileWrites + .prereq(fpRegfileWrites); + + vecRegfileReads + .prereq(vecRegfileReads); + + vecRegfileWrites + .prereq(vecRegfileWrites); + + vecPredRegfileReads + .prereq(vecPredRegfileReads); + + vecPredRegfileWrites + .prereq(vecPredRegfileWrites); + + ccRegfileReads + .prereq(ccRegfileReads); + + ccRegfileWrites + .prereq(ccRegfileWrites); + + miscRegfileReads + .prereq(miscRegfileReads); + + miscRegfileWrites + .prereq(miscRegfileWrites); } void @@ -916,7 +1019,7 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const RegVal CPU::readMiscReg(int misc_reg, ThreadID tid) { - executeStats[tid]->numMiscRegReads++; + cpuStats.miscRegfileReads++; return isa[tid]->readMiscReg(misc_reg); } @@ -929,29 +1032,29 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) void CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { - executeStats[tid]->numMiscRegWrites++; + cpuStats.miscRegfileWrites++; isa[tid]->setMiscReg(misc_reg, val); } RegVal -CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) +CPU::getReg(PhysRegIdPtr phys_reg) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegReads++; + cpuStats.intRegfileReads++; break; case FloatRegClass: - executeStats[tid]->numFpRegReads++; + cpuStats.fpRegfileReads++; break; case CCRegClass: - executeStats[tid]->numCCRegReads++; + cpuStats.ccRegfileReads++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -960,24 +1063,24 @@ CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) } void -CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) +CPU::getReg(PhysRegIdPtr phys_reg, void *val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegReads++; + cpuStats.intRegfileReads++; break; case FloatRegClass: - executeStats[tid]->numFpRegReads++; + cpuStats.fpRegfileReads++; break; case CCRegClass: - executeStats[tid]->numCCRegReads++; + cpuStats.ccRegfileReads++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -986,14 +1089,14 @@ CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) } void * -CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) +CPU::getWritableReg(PhysRegIdPtr phys_reg) { switch (phys_reg->classValue()) { case VecRegClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -1002,24 +1105,24 @@ CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) } void -CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) +CPU::setReg(PhysRegIdPtr phys_reg, RegVal val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegWrites++; + cpuStats.intRegfileWrites++; break; case FloatRegClass: - executeStats[tid]->numFpRegWrites++; + cpuStats.fpRegfileWrites++; break; case CCRegClass: - executeStats[tid]->numCCRegWrites++; + cpuStats.ccRegfileWrites++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegWrites++; + cpuStats.vecRegfileWrites++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegWrites++; + cpuStats.vecPredRegfileWrites++; break; default: break; @@ -1028,24 +1131,24 @@ CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) } void -CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid) +CPU::setReg(PhysRegIdPtr phys_reg, const void *val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegWrites++; + cpuStats.intRegfileWrites++; break; case FloatRegClass: - executeStats[tid]->numFpRegWrites++; + cpuStats.fpRegfileWrites++; break; case CCRegClass: - executeStats[tid]->numCCRegWrites++; + cpuStats.ccRegfileWrites++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegWrites++; + cpuStats.vecRegfileWrites++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegWrites++; + cpuStats.vecPredRegfileWrites++; break; default: break; @@ -1127,14 +1230,14 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst) if (!inst->isMicroop() || inst->isLastMicroop()) { thread[tid]->numInst++; thread[tid]->threadStats.numInsts++; - commitStats[tid]->numInstsNotNOP++; + cpuStats.committedInsts[tid]++; // Check for instruction-count-based events. thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst); } thread[tid]->numOp++; thread[tid]->threadStats.numOps++; - commitStats[tid]->numOpsNotNOP++; + cpuStats.committedOps[tid]++; probeInstCommit(inst->staticInst, inst->pcState().instAddr()); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 7dc378428b..08a1312e73 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -310,12 +310,12 @@ class CPU : public BaseCPU */ void setMiscReg(int misc_reg, RegVal val, ThreadID tid); - RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid); - void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid); - void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid); + RegVal getReg(PhysRegIdPtr phys_reg); + void getReg(PhysRegIdPtr phys_reg, void *val); + void *getWritableReg(PhysRegIdPtr phys_reg); - void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid); - void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid); + void setReg(PhysRegIdPtr phys_reg, RegVal val); + void setReg(PhysRegIdPtr phys_reg, const void *val); /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the @@ -581,7 +581,38 @@ class CPU : public BaseCPU /** Stat for total number of cycles the CPU spends descheduled due to a * quiesce operation or waiting for an interrupt. */ statistics::Scalar quiesceCycles; + /** Stat for the number of committed instructions per thread. */ + statistics::Vector committedInsts; + /** Stat for the number of committed ops (including micro ops) per + * thread. */ + statistics::Vector committedOps; + /** Stat for the CPI per thread. */ + statistics::Formula cpi; + /** Stat for the total CPI. */ + statistics::Formula totalCpi; + /** Stat for the IPC per thread. */ + statistics::Formula ipc; + /** Stat for the total IPC. */ + statistics::Formula totalIpc; + //number of integer register file accesses + statistics::Scalar intRegfileReads; + statistics::Scalar intRegfileWrites; + //number of float register file accesses + statistics::Scalar fpRegfileReads; + statistics::Scalar fpRegfileWrites; + //number of vector register file accesses + mutable statistics::Scalar vecRegfileReads; + statistics::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable statistics::Scalar vecPredRegfileReads; + statistics::Scalar vecPredRegfileWrites; + //number of CC register file accesses + statistics::Scalar ccRegfileReads; + statistics::Scalar ccRegfileWrites; + //number of misc + statistics::Scalar miscRegfileReads; + statistics::Scalar miscRegfileWrites; } cpuStats; public: diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index c759c5eb38..54c0385374 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1086,10 +1086,10 @@ class DynInst : public ExecContext, public RefCounted if (bytes == sizeof(RegVal)) { setRegOperand(staticInst.get(), idx, - cpu->getReg(prev_phys_reg, threadNumber)); + cpu->getReg(prev_phys_reg)); } else { uint8_t val[original_dest_reg.regClass().regBytes()]; - cpu->getReg(prev_phys_reg, val, threadNumber); + cpu->getReg(prev_phys_reg, val); setRegOperand(staticInst.get(), idx, val); } } @@ -1116,7 +1116,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return 0; - return cpu->getReg(reg, threadNumber); + return cpu->getReg(reg); } void @@ -1125,13 +1125,13 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->getReg(reg, val, threadNumber); + cpu->getReg(reg, val); } void * getWritableRegOperand(const StaticInst *si, int idx) override { - return cpu->getWritableReg(renamedDestIdx(idx), threadNumber); + return cpu->getWritableReg(renamedDestIdx(idx)); } /** @todo: Make results into arrays so they can handle multiple dest @@ -1143,7 +1143,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val, threadNumber); + cpu->setReg(reg, val); setResult(reg->regClass(), val); } @@ -1153,7 +1153,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val, threadNumber); + cpu->setReg(reg, val); setResult(reg->regClass(), val); } }; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index f5fc6c62ec..d3cdd2c761 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -158,6 +158,12 @@ Fetch::regProbePoints() Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) : statistics::Group(cpu, "fetch"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "Number of cycles fetch is stalled on an Icache miss"), + ADD_STAT(insts, statistics::units::Count::get(), + "Number of instructions fetch has processed"), + ADD_STAT(branches, statistics::units::Count::get(), + "Number of branches that fetch encountered"), ADD_STAT(predictedBranches, statistics::units::Count::get(), "Number of branches that fetch has predicted taken"), ADD_STAT(cycles, statistics::units::Cycle::get(), @@ -194,8 +200,21 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) "Number of instructions fetched each cycle (Total)"), ADD_STAT(idleRate, statistics::units::Ratio::get(), "Ratio of cycles fetch was idle", - idleCycles / cpu->baseStats.numCycles) + idleCycles / cpu->baseStats.numCycles), + ADD_STAT(branchRate, statistics::units::Ratio::get(), + "Number of branch fetches per cycle", + branches / cpu->baseStats.numCycles), + ADD_STAT(rate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Number of inst fetches per cycle", + insts / cpu->baseStats.numCycles) { + icacheStallCycles + .prereq(icacheStallCycles); + insts + .prereq(insts); + branches + .prereq(branches); predictedBranches .prereq(predictedBranches); cycles @@ -233,6 +252,10 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) .flags(statistics::pdf); idleRate .prereq(idleRate); + branchRate + .flags(statistics::total); + rate + .flags(statistics::total); } void Fetch::setTimeBuffer(TimeBuffer *time_buffer) @@ -517,7 +540,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc) inst->setPredTarg(next_pc); inst->setPredTaken(predict_taken); - cpu->fetchStats[tid]->numBranches++; + ++fetchStats.branches; if (predict_taken) { ++fetchStats.predictedBranches; @@ -1123,7 +1146,7 @@ Fetch::fetch(bool &status_change) fetchCacheLine(fetchAddr, tid, this_pc.instAddr()); if (fetchStatus[tid] == IcacheWaitResponse) - cpu->fetchStats[tid]->icacheStallCycles++; + ++fetchStats.icacheStallCycles; else if (fetchStatus[tid] == ItlbWait) ++fetchStats.tlbCycles; else @@ -1219,7 +1242,7 @@ Fetch::fetch(bool &status_change) staticInst = dec_ptr->decode(this_pc); // Increment stat of fetched instructions. - cpu->fetchStats[tid]->numInsts++; + ++fetchStats.insts; if (staticInst->isMacroop()) { curMacroop = staticInst; @@ -1549,7 +1572,7 @@ Fetch::profileStall(ThreadID tid) ++fetchStats.squashCycles; DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); } else if (fetchStatus[tid] == IcacheWaitResponse) { - cpu->fetchStats[tid]->icacheStallCycles++; + ++fetchStats.icacheStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", tid); } else if (fetchStatus[tid] == ItlbWait) { diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 6add31444d..cd311913f5 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -533,6 +533,12 @@ class Fetch FetchStatGroup(CPU *cpu, Fetch *fetch); // @todo: Consider making these // vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + statistics::Scalar icacheStallCycles; + /** Stat for total number of fetched instructions. */ + statistics::Scalar insts; + /** Total number of fetched branches. */ + statistics::Scalar branches; /** Stat for total number of predicted branches. */ statistics::Scalar predictedBranches; /** Stat for total number of cycles spent fetching. */ @@ -575,6 +581,10 @@ class Fetch statistics::Distribution nisnDist; /** Rate of how often fetch was idle. */ statistics::Formula idleRate; + /** Number of branch fetches per cycle. */ + statistics::Formula branchRate; + /** Number of instruction fetched per cycle. */ + statistics::Formula rate; } fetchStats; }; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 92d281ce93..7cf6c54542 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -217,14 +217,52 @@ IEW::IEWStats::IEWStats(CPU *cpu) IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu) : statistics::Group(cpu), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of executed instructions"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions executed"), ADD_STAT(numSquashedInsts, statistics::units::Count::get(), "Number of squashed instructions skipped in execute"), ADD_STAT(numSwp, statistics::units::Count::get(), - "Number of swp insts executed") + "Number of swp insts executed"), + ADD_STAT(numNop, statistics::units::Count::get(), + "Number of nop insts executed"), + ADD_STAT(numRefs, statistics::units::Count::get(), + "Number of memory reference insts executed"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches executed"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of stores executed"), + ADD_STAT(numRate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Inst execution rate", numInsts / cpu->baseStats.numCycles) { + numLoadInsts + .init(cpu->numThreads) + .flags(statistics::total); + numSwp .init(cpu->numThreads) .flags(statistics::total); + + numNop + .init(cpu->numThreads) + .flags(statistics::total); + + numRefs + .init(cpu->numThreads) + .flags(statistics::total); + + numBranches + .init(cpu->numThreads) + .flags(statistics::total); + + numStoreInsts + .flags(statistics::total); + numStoreInsts = numRefs - numLoadInsts; + + numRate + .flags(statistics::total); } void @@ -1015,7 +1053,7 @@ IEW::dispatchInsts(ThreadID tid) instQueue.recordProducer(inst); - cpu->executeStats[tid]->numNop++; + iewStats.executedInstStats.numNop[tid]++; add_to_iq = false; } else { @@ -1523,7 +1561,7 @@ IEW::updateExeInstStats(const DynInstPtr& inst) { ThreadID tid = inst->threadNumber; - cpu->executeStats[tid]->numInsts++; + iewStats.executedInstStats.numInsts++; #if TRACING_ON if (debug::O3PipeView) { @@ -1535,16 +1573,16 @@ IEW::updateExeInstStats(const DynInstPtr& inst) // Control operations // if (inst->isControl()) - cpu->executeStats[tid]->numBranches++; + iewStats.executedInstStats.numBranches[tid]++; // // Memory operations // if (inst->isMemRef()) { - cpu->executeStats[tid]->numMemRefs++; + iewStats.executedInstStats.numRefs[tid]++; if (inst->isLoad()) { - cpu->executeStats[tid]->numLoadInsts++; + iewStats.executedInstStats.numLoadInsts[tid]++; } } } diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 4fe8227dcc..80fed295df 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -455,11 +455,25 @@ class IEW { ExecutedInstStats(CPU *cpu); + /** Stat for total number of executed instructions. */ + statistics::Scalar numInsts; + /** Stat for total number of executed load instructions. */ + statistics::Vector numLoadInsts; /** Stat for total number of squashed instructions skipped at * execute. */ statistics::Scalar numSquashedInsts; /** Number of executed software prefetches. */ statistics::Vector numSwp; + /** Number of executed nops. */ + statistics::Vector numNop; + /** Number of executed meomory references. */ + statistics::Vector numRefs; + /** Number of executed branches. */ + statistics::Vector numBranches; + /** Number of executed store instructions. */ + statistics::Formula numStoreInsts; + /** Number of instructions executed per cycle. */ + statistics::Formula numRate; } executedInstStats; /** Number of instructions sent to commit. */ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 35d149097c..768f63ede5 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -154,36 +154,10 @@ BaseSimpleCPU::countInst() if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { t_info.numInst++; + t_info.execContextStats.numInsts++; } t_info.numOp++; -} - -void -BaseSimpleCPU::countFetchInst() -{ - SimpleExecContext& t_info = *threadInfo[curThread]; - - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - // increment thread level numInsts fetched count - fetchStats[t_info.thread->threadId()]->numInsts++; - } - // increment thread level numOps fetched count - fetchStats[t_info.thread->threadId()]->numOps++; -} - -void -BaseSimpleCPU::countCommitInst() -{ - SimpleExecContext& t_info = *threadInfo[curThread]; - - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - // increment thread level and core level numInsts count - commitStats[t_info.thread->threadId()]->numInsts++; - baseStats.numInsts++; - } - // increment thread level and core level numOps count - commitStats[t_info.thread->threadId()]->numOps++; - baseStats.numOps++; + t_info.execContextStats.numOps++; } Counter @@ -402,11 +376,6 @@ BaseSimpleCPU::preExecute() if (predict_taken) ++t_info.execContextStats.numPredictedBranches; } - - // increment the fetch instruction stat counters - if (curStaticInst) { - countFetchInst(); - } } void @@ -419,7 +388,7 @@ BaseSimpleCPU::postExecute() Addr instAddr = threadContexts[curThread]->pcState().instAddr(); if (curStaticInst->isMemRef()) { - executeStats[t_info.thread->threadId()]->numMemRefs++; + t_info.execContextStats.numMemRefs++; } if (curStaticInst->isLoad()) { @@ -427,26 +396,26 @@ BaseSimpleCPU::postExecute() } if (curStaticInst->isControl()) { - ++fetchStats[t_info.thread->threadId()]->numBranches; + ++t_info.execContextStats.numBranches; } /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ - executeStats[t_info.thread->threadId()]->numIntAluAccesses++; - commitStats[t_info.thread->threadId()]->numIntInsts++; + t_info.execContextStats.numIntAluAccesses++; + t_info.execContextStats.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ - executeStats[t_info.thread->threadId()]->numFpAluAccesses++; - commitStats[t_info.thread->threadId()]->numFpInsts++; + t_info.execContextStats.numFpAluAccesses++; + t_info.execContextStats.numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ - executeStats[t_info.thread->threadId()]->numVecAluAccesses++; - commitStats[t_info.thread->threadId()]->numVecInsts++; + t_info.execContextStats.numVecAluAccesses++; + t_info.execContextStats.numVecInsts++; } //Matrix alu accesses @@ -460,22 +429,22 @@ BaseSimpleCPU::postExecute() t_info.execContextStats.numCallsReturns++; } + //the number of branch predictions that will be made + if (curStaticInst->isCondCtrl()){ + t_info.execContextStats.numCondCtrlInsts++; + } + //result bus acceses if (curStaticInst->isLoad()){ - commitStats[t_info.thread->threadId()]->numLoadInsts++; + t_info.execContextStats.numLoadInsts++; } if (curStaticInst->isStore() || curStaticInst->isAtomic()){ - commitStats[t_info.thread->threadId()]->numStoreInsts++; + t_info.execContextStats.numStoreInsts++; } /* End power model statistics */ - commitStats[t_info.thread->threadId()] - ->committedInstType[curStaticInst->opClass()]++; - commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst); - - /* increment the committed numInsts and numOps stats */ - countCommitInst(); + t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++; if (FullSystem) traceFunctions(instAddr); diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 46a25a0a42..df5290cf3c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -182,8 +182,6 @@ class BaseSimpleCPU : public BaseCPU } void countInst(); - void countFetchInst(); - void countCommitInst(); Counter totalInsts() const override; Counter totalOps() const override; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index c0927fcadd..0f20763f28 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -86,12 +86,60 @@ class SimpleExecContext : public ExecContext : statistics::Group(cpu, csprintf("exec_context.thread_%i", thread->threadId()).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), + "Number of float alu accesses"), + ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), + "Number of vector alu accesses"), ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), "Number of times a function call or return occured"), + ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(), + "Number of instructions that are conditional controls"), + ADD_STAT(numIntInsts, statistics::units::Count::get(), + "Number of integer instructions"), + ADD_STAT(numFpInsts, statistics::units::Count::get(), + "Number of float instructions"), + ADD_STAT(numVecInsts, statistics::units::Count::get(), + "Number of vector instructions"), ADD_STAT(numMatInsts, statistics::units::Count::get(), "Number of matrix instructions"), + ADD_STAT(numIntRegReads, statistics::units::Count::get(), + "Number of times the integer registers were read"), + ADD_STAT(numIntRegWrites, statistics::units::Count::get(), + "Number of times the integer registers were written"), + ADD_STAT(numFpRegReads, statistics::units::Count::get(), + "Number of times the floating registers were read"), + ADD_STAT(numFpRegWrites, statistics::units::Count::get(), + "Number of times the floating registers were written"), + ADD_STAT(numVecRegReads, statistics::units::Count::get(), + "Number of times the vector registers were read"), + ADD_STAT(numVecRegWrites, statistics::units::Count::get(), + "Number of times the vector registers were written"), + ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), + "Number of times the predicate registers were read"), + ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), + "Number of times the predicate registers were written"), + ADD_STAT(numCCRegReads, statistics::units::Count::get(), + "Number of times the CC registers were read"), + ADD_STAT(numCCRegWrites, statistics::units::Count::get(), + "Number of times the CC registers were written"), + ADD_STAT(numMiscRegReads, statistics::units::Count::get(), + "Number of times the Misc registers were read"), + ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), + "Number of times the Misc registers were written"), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory refs"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of store instructions"), ADD_STAT(numIdleCycles, statistics::units::Cycle::get(), "Number of idle cycles"), ADD_STAT(numBusyCycles, statistics::units::Cycle::get(), @@ -100,35 +148,64 @@ class SimpleExecContext : public ExecContext "Percentage of non-idle cycles"), ADD_STAT(idleFraction, statistics::units::Ratio::get(), "Percentage of idle cycles"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "ICache total stall cycles"), + ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), + "DCache total stall cycles"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches fetched"), ADD_STAT(numPredictedBranches, statistics::units::Count::get(), "Number of branches predicted as taken"), ADD_STAT(numBranchMispred, statistics::units::Count::get(), "Number of branch mispredictions"), + ADD_STAT(statExecutedInstType, statistics::units::Count::get(), + "Class of executed instruction."), numRegReads{ - &(cpu->executeStats[thread->threadId()]->numIntRegReads), - &(cpu->executeStats[thread->threadId()]->numFpRegReads), - &(cpu->executeStats[thread->threadId()]->numVecRegReads), - &(cpu->executeStats[thread->threadId()]->numVecRegReads), - &(cpu->executeStats[thread->threadId()]->numVecPredRegReads), - &(cpu->executeStats[thread->threadId()]->numCCRegReads), - &numMatRegReads + &numIntRegReads, + &numFpRegReads, + &numVecRegReads, + &numVecRegReads, + &numVecPredRegReads, + &numMatRegReads, + &numCCRegReads }, numRegWrites{ - &(cpu->executeStats[thread->threadId()]->numIntRegWrites), - &(cpu->executeStats[thread->threadId()]->numFpRegWrites), - &(cpu->executeStats[thread->threadId()]->numVecRegWrites), - &(cpu->executeStats[thread->threadId()]->numVecRegWrites), - &(cpu->executeStats[thread->threadId()] - ->numVecPredRegWrites), - &(cpu->executeStats[thread->threadId()]->numCCRegWrites), - &numMatRegWrites + &numIntRegWrites, + &numFpRegWrites, + &numVecRegWrites, + &numVecRegWrites, + &numVecPredRegWrites, + &numMatRegWrites, + &numCCRegWrites } { + numCCRegReads + .flags(statistics::nozero); + + numCCRegWrites + .flags(statistics::nozero); + + icacheStallCycles + .prereq(icacheStallCycles); + + dcacheStallCycles + .prereq(dcacheStallCycles); + + statExecutedInstType + .init(enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + + for (unsigned i = 0; i < Num_OpClasses; ++i) { + statExecutedInstType.subname(i, enums::OpClassStrings[i]); + } idleFraction = statistics::constant(1.0) - notIdleFraction; numIdleCycles = idleFraction * cpu->baseStats.numCycles; numBusyCycles = notIdleFraction * cpu->baseStats.numCycles; + numBranches + .prereq(numBranches); + numPredictedBranches .prereq(numPredictedBranches); @@ -136,19 +213,73 @@ class SimpleExecContext : public ExecContext .prereq(numBranchMispred); } + // Number of simulated instructions + statistics::Scalar numInsts; + statistics::Scalar numOps; + + // Number of integer alu accesses + statistics::Scalar numIntAluAccesses; + + // Number of float alu accesses + statistics::Scalar numFpAluAccesses; + + // Number of vector alu accesses + statistics::Scalar numVecAluAccesses; + // Number of matrix alu accesses statistics::Scalar numMatAluAccesses; // Number of function calls/returns statistics::Scalar numCallsReturns; + // Conditional control instructions; + statistics::Scalar numCondCtrlInsts; + + // Number of int instructions + statistics::Scalar numIntInsts; + + // Number of float instructions + statistics::Scalar numFpInsts; + + // Number of vector instructions + statistics::Scalar numVecInsts; + // Number of matrix instructions statistics::Scalar numMatInsts; + // Number of integer register file accesses + statistics::Scalar numIntRegReads; + statistics::Scalar numIntRegWrites; + + // Number of float register file accesses + statistics::Scalar numFpRegReads; + statistics::Scalar numFpRegWrites; + + // Number of vector register file accesses + mutable statistics::Scalar numVecRegReads; + statistics::Scalar numVecRegWrites; + + // Number of predicate register file accesses + mutable statistics::Scalar numVecPredRegReads; + statistics::Scalar numVecPredRegWrites; + // Number of matrix register file accesses mutable statistics::Scalar numMatRegReads; statistics::Scalar numMatRegWrites; + // Number of condition code register file accesses + statistics::Scalar numCCRegReads; + statistics::Scalar numCCRegWrites; + + // Number of misc register file accesses + statistics::Scalar numMiscRegReads; + statistics::Scalar numMiscRegWrites; + + // Number of simulated memory references + statistics::Scalar numMemRefs; + statistics::Scalar numLoadInsts; + statistics::Scalar numStoreInsts; + // Number of idle cycles statistics::Formula numIdleCycles; @@ -159,13 +290,24 @@ class SimpleExecContext : public ExecContext statistics::Average notIdleFraction; statistics::Formula idleFraction; + // Number of cycles stalled for I-cache responses + statistics::Scalar icacheStallCycles; + + // Number of cycles stalled for D-cache responses + statistics::Scalar dcacheStallCycles; + /// @{ + /// Total number of branches fetched + statistics::Scalar numBranches; /// Number of branches predicted as taken statistics::Scalar numPredictedBranches; /// Number of misprediced branches statistics::Scalar numBranchMispred; /// @} + // Instruction mix histogram by OpClass + statistics::Vector statExecutedInstType; + std::array numRegReads; std::array numRegWrites; @@ -226,7 +368,7 @@ class SimpleExecContext : public ExecContext RegVal readMiscRegOperand(const StaticInst *si, int idx) override { - cpu->executeStats[thread->threadId()]->numMiscRegReads++; + execContextStats.numMiscRegReads++; const RegId& reg = si->srcRegIdx(idx); assert(reg.is(MiscRegClass)); return thread->readMiscReg(reg.index()); @@ -235,7 +377,7 @@ class SimpleExecContext : public ExecContext void setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override { - cpu->executeStats[thread->threadId()]->numMiscRegWrites++; + execContextStats.numMiscRegWrites++; const RegId& reg = si->destRegIdx(idx); assert(reg.is(MiscRegClass)); thread->setMiscReg(reg.index(), val); @@ -248,7 +390,7 @@ class SimpleExecContext : public ExecContext RegVal readMiscReg(int misc_reg) override { - cpu->executeStats[thread->threadId()]->numMiscRegReads++; + execContextStats.numMiscRegReads++; return thread->readMiscReg(misc_reg); } @@ -259,7 +401,7 @@ class SimpleExecContext : public ExecContext void setMiscReg(int misc_reg, RegVal val) override { - cpu->executeStats[thread->threadId()]->numMiscRegWrites++; + execContextStats.numMiscRegWrites++; thread->setMiscReg(misc_reg, val); } From 4c8ad56072d66974485fbe94e92ef1a1a890c291 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 21 Feb 2023 05:08:34 +0000 Subject: [PATCH 249/492] fastmodel: Check early for license server issue We have a setup that requires manual startup of an ssh proxy to access license server, and without that, gem5 takes about a minute until the license checkout times out (until then, it's unclear why nothing is happening). We asked ARM for a way to decrease timeouts, but that doesn't seem to be easy to do. Change-Id: I37b84fd52cb7fb221a9e48dcb52a33a11f4d1580 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68177 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/arch/arm/fastmodel/arm_fast_model.py | 70 ++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py index d2d911f5b4..81b2cfe04b 100644 --- a/src/arch/arm/fastmodel/arm_fast_model.py +++ b/src/arch/arm/fastmodel/arm_fast_model.py @@ -23,21 +23,75 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import logging import os +import socket from m5.defines import buildEnv import _m5.arm_fast_model +ARM_LICENSE_ENV = "ARMLMD_LICENSE_FILE" +LM_LICENSE_ENV = "LM_LICENSE_FILE" + def set_armlmd_license_file(force=False): """Set the ARMLMD_LICENSE_FILE environment variable. If "force" is False, then it will only be set if it wasn't already set in the environment. The value it's set to is the one gem5 was built with. """ - key = "ARMLMD_LICENSE_FILE" - license_file = buildEnv[key] - if force or key not in os.environ: - os.environ[key] = license_file + license_file = buildEnv[ARM_LICENSE_ENV] + if force or ARM_LICENSE_ENV not in os.environ: + os.environ[ARM_LICENSE_ENV] = license_file + + +def check_armlmd_license(timeout): + """Check if any of the provided license server can be reached, or + if a license file is provided. This allows to fail early and fast, + as fastmodel code makes multiple lengthy attempts to connect to + license server. "timeout" is in seconds. + """ + servers = os.environ[ARM_LICENSE_ENV].split(":") + + extras = list() + # Add LM_LICENSE_ENV to the list, if set. + if LM_LICENSE_ENV in os.environ and os.environ[LM_LICENSE_ENV]: + extras += os.environ[LM_LICENSE_ENV].split(":") + # Fastmodel appears to always add this file. + extras.append("/opt/arm/licenses/license.dat") + for extra in extras: + if extra not in servers: + servers.append(extra) + + for server in servers: + if os.path.exists(server): + logging.debug("License file %s exists." % server) + break + + tuple = server.split("@") + if len(tuple) != 2: + # Probably not a server, and we know the file doesn't exist. + logging.debug('License file "%s" does not exist.' % server) + continue + + try: + # Try to connect to license server. This doesn't attempt to + # communicate with it, just checking reachability. + s = socket.create_connection( + (tuple[1], int(tuple[0])), timeout=timeout + ) + s.close() + logging.debug("License server %s is reachable." % server) + break + except Exception as e: + logging.debug( + "Cannot connect to license server %s (%s: %s)." + % (server, type(e).__name__, e) + ) + else: + raise ConnectionError( + "Cannot connect to any of the license servers (%s)." + % ", ".join(servers) + ) # These methods wrap much of the SystemC Export API described in section @@ -142,9 +196,15 @@ def scx_get_min_sync_latency(arg=None): # This should be called once per simulation def setup_simulation( - sim_name, min_sync_latency=100.0 / 100000000, exit_on_dmi_warning=False + sim_name, + min_sync_latency=100.0 / 100000000, + exit_on_dmi_warning=False, + license_precheck=False, + license_precheck_timeout=1, ): set_armlmd_license_file() + if license_precheck: + check_armlmd_license(license_precheck_timeout) scx_initialize(sim_name) scx_set_min_sync_latency(min_sync_latency) if exit_on_dmi_warning: From cdab011373f74ec8a8810af09c5b2ee52a83242c Mon Sep 17 00:00:00 2001 From: paikunal Date: Tue, 7 Mar 2023 10:22:48 -0800 Subject: [PATCH 250/492] configs: Adds an example script for POWER Hello Used the "power-hello" resource to make an stdlib example script for that resource Change-Id: Ia8a051330e263617aa0e2ef08321d01cfa1093c4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68737 Reviewed-by: Boris Shingarov Maintainer: Boris Shingarov Tested-by: kokoro --- configs/example/gem5_library/power-hello.py | 89 +++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 configs/example/gem5_library/power-hello.py diff --git a/configs/example/gem5_library/power-hello.py b/configs/example/gem5_library/power-hello.py new file mode 100644 index 0000000000..cf31778945 --- /dev/null +++ b/configs/example/gem5_library/power-hello.py @@ -0,0 +1,89 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This gem5 configuation script creates a simple board to run a POWER +"hello world" binary. + +This is setup is the close to the simplest setup possible using the gem5 +library. It does not contain any kind of caching, IO, or any non-essential +components. + +Usage +----- + +``` +scons build/POWER/gem5.opt +./build/POWER/gem5.opt configs/example/gem5_library/power-hello.py +``` +""" + +from gem5.isas import ISA +from gem5.utils.requires import requires +from gem5.resources.resource import Resource +from gem5.components.memory import SingleChannelDDR4_2400 +from gem5.components.processors.cpu_types import CPUTypes +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.cachehierarchies.classic.no_cache import NoCache +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.simulate.simulator import Simulator + +# This check ensures the gem5 binary is compiled to the POWER ISA target. +# If not, an exception will be thrown. +requires(isa_required=ISA.POWER) + +# In this setup we don't have a cache. `NoCache` can be used for such setups. +cache_hierarchy = NoCache() + +# We use a single channel DDR4_2400 memory system +memory = SingleChannelDDR4_2400(size="32MB") + +# We use a simple ATOMIC processor with one core. +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, isa=ISA.POWER, num_cores=1 +) + +# The gem5 library simple board which can be used to run simple SE-mode +# simulations. +board = SimpleBoard( + clk_freq="3GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, +) + +board.set_se_binary_workload(Resource("power-hello")) + +# Lastly we run the simulation. +simulator = Simulator(board=board) +simulator.run() + +print( + "Exiting @ tick {} because {}.".format( + simulator.get_current_tick(), + simulator.get_last_exit_event_cause(), + ) +) From 54e06f88c25a2284e4cd053a27bb868b1260fe19 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Wed, 8 Mar 2023 09:19:14 +0000 Subject: [PATCH 251/492] tests: Fix import path in simple_binary_run.py We should be using gem5.components instead of python.gem5.components. In https://gem5-review.git.corp.google.com/c/public/gem5/+/68518 I was seeing the RISC-V tests fail with `ModuleNotFoundError: No module named 'python.gem5.components.processors.base_cpu_core'`. This fixes the issue for me with the RISC-V tests. I also searched for other similar imports and I've removed a similar (unused) one in x86_boot_exit_run.py. Change-Id: I61a0c4c27724854956f778f14e1fcfafea927ffd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68757 Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Tested-by: kokoro --- tests/gem5/configs/simple_binary_run.py | 3 +-- tests/gem5/configs/x86_boot_exit_run.py | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/gem5/configs/simple_binary_run.py b/tests/gem5/configs/simple_binary_run.py index fbb0313f93..5540e806ba 100644 --- a/tests/gem5/configs/simple_binary_run.py +++ b/tests/gem5/configs/simple_binary_run.py @@ -40,6 +40,7 @@ from gem5.components.memory import SingleChannelDDR3_1600 from gem5.components.boards.simple_board import SimpleBoard from gem5.components.cachehierarchies.classic.no_cache import NoCache from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.base_cpu_core import BaseCPUCore from gem5.components.processors.base_cpu_processor import BaseCPUProcessor from gem5.components.processors.simple_core import SimpleCore from gem5.components.boards.mem_mode import MemMode @@ -52,8 +53,6 @@ from m5.util import fatal import argparse import importlib -from python.gem5.components.processors.base_cpu_core import BaseCPUCore - cpu_types_string_map = { CPUTypes.ATOMIC: "AtomicSimpleCPU", CPUTypes.O3: "O3CPU", diff --git a/tests/gem5/configs/x86_boot_exit_run.py b/tests/gem5/configs/x86_boot_exit_run.py index b1cbc647b2..5458b6db6c 100644 --- a/tests/gem5/configs/x86_boot_exit_run.py +++ b/tests/gem5/configs/x86_boot_exit_run.py @@ -46,9 +46,6 @@ from gem5.resources.workload import Workload import argparse import importlib -from python.gem5.components.boards.kernel_disk_workload import ( - KernelDiskWorkload, -) parser = argparse.ArgumentParser( description="A script to run the gem5 boot test. This test boots the " From 79d407280ca0d7553b1f29a9533f64664b8ecb65 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 27 Feb 2023 16:58:44 +0000 Subject: [PATCH 252/492] arch-riscv: Fix invalid std::map access The CSRData map uses a RISC-V CSR number as the key rather than one of the MISCREG_* enumerators. Use MiscRegNames[] instead to stringify the argument for the debug message. Change-Id: I2533bc29d148d3b34c01022eeaeedf64c39a99b9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68759 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/isa.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index d778957b9e..7964de51ec 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -510,7 +510,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val) { if (idx >= MISCREG_CYCLE && idx <= MISCREG_HPMCOUNTER31) { // Ignore writes to HPM counters for now - warn("Ignoring write to %s.\n", CSRData.at(idx).name); + warn("Ignoring write to miscreg %s.\n", MiscRegNames[idx]); } else { switch (idx) { From 6841e1aa5a1738961940fece2b35baf77c8c224d Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Fri, 10 Mar 2023 13:53:08 -0800 Subject: [PATCH 253/492] stdlib: Fix bug in MESI_Three_Level_Cache initialization Change-Id: I2d06c842955aa1868053a0d852fc523392480154 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68857 Tested-by: kokoro Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- .../cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py | 4 ++-- .../cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py index 9f47e411f8..b4854816fb 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py @@ -68,14 +68,14 @@ class L1Cache(L0Cache_Controller): self.Icache = RubyCache( size=l1i_size, assoc=l1i_assoc, - start_index_bit=self.getBlockSizeBits(cache_line_size.value), + start_index_bit=self.getBlockSizeBits(cache_line_size), is_icache=True, replacement_policy=LRURP(), ) self.Dcache = RubyCache( size=l1d_size, assoc=l1d_assoc, - start_index_bit=self.getBlockSizeBits(cache_line_size.value), + start_index_bit=self.getBlockSizeBits(cache_line_size), is_icache=False, replacement_policy=LRURP(), ) diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py index d8c965924e..d54e1ab8dc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py @@ -67,7 +67,7 @@ class L2Cache(L1Cache_Controller): self.cache = RubyCache( size=l2_size, assoc=l2_assoc, - start_index_bit=self.getBlockSizeBits(cache_line_size.value), + start_index_bit=self.getBlockSizeBits(cache_line_size), is_icache=False, ) # l2_select_num_bits is ruby backend terminology. From 3fe129e8ea375f45879de414c02444c68659ad1d Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Fri, 10 Mar 2023 23:52:57 -0800 Subject: [PATCH 254/492] stdlib: use atomic_noncaching when using AtomicSimpleCPU with Ruby mem_mode is supposed to be atomic_noncaching when running AtomicSimpleCPU with Ruby cache. Change-Id: Icb419f9370038f5c1f80dd879b187338279a5b93 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68877 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- .../gem5/components/processors/base_cpu_processor.py | 1 + .../processors/simple_switchable_processor.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/src/python/gem5/components/processors/base_cpu_processor.py b/src/python/gem5/components/processors/base_cpu_processor.py index d311a0fdc0..9a7561587a 100644 --- a/src/python/gem5/components/processors/base_cpu_processor.py +++ b/src/python/gem5/components/processors/base_cpu_processor.py @@ -97,6 +97,7 @@ class BaseCPUProcessor(AbstractProcessor): "'atomic_noncaching' memory mode. This will skip caching " "completely." ) + board.set_mem_mode(MemMode.ATOMIC_NONCACHING) else: board.set_mem_mode(MemMode.ATOMIC) else: diff --git a/src/python/gem5/components/processors/simple_switchable_processor.py b/src/python/gem5/components/processors/simple_switchable_processor.py index 56603fa98b..e3978412c3 100644 --- a/src/python/gem5/components/processors/simple_switchable_processor.py +++ b/src/python/gem5/components/processors/simple_switchable_processor.py @@ -103,6 +103,16 @@ class SimpleSwitchableProcessor(SwitchableProcessor): def incorporate_processor(self, board: AbstractBoard) -> None: super().incorporate_processor(board=board) + if ( + board.get_cache_hierarchy().is_ruby() + and self._mem_mode == MemMode.ATOMIC + ): + warn( + "Using an atomic core with Ruby will result in " + "'atomic_noncaching' memory mode. This will skip caching " + "completely." + ) + self._mem_mode = MemMode.ATOMIC_NONCACHING board.set_mem_mode(self._mem_mode) def switch(self): From 9fe9b2853c27c846206a2c6b48f186cb8ed5097d Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Mon, 20 Jun 2022 16:52:15 +0200 Subject: [PATCH 255/492] base: Create a gem5 type_traits.hh header That header currently contains type traits that derive the class, the return type and the arguments of a member function from a pointer to that member function. Change-Id: I41dd41056f507016219d6111d25c8cb4c2ad3439 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67652 Reviewed-by: Daniel Carvalho Maintainer: Daniel Carvalho Tested-by: kokoro --- src/base/type_traits.hh | 97 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 src/base/type_traits.hh diff --git a/src/base/type_traits.hh b/src/base/type_traits.hh new file mode 100644 index 0000000000..1fec93d9d1 --- /dev/null +++ b/src/base/type_traits.hh @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2022 Arteris, Inc. and its applicable licensors and + * affiliates. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __BASE_TYPETRAITS_HH__ +#define __BASE_TYPETRAITS_HH__ + +#include +#include + +namespace gem5 +{ + +/* + * Type traits that enable inspecting the signature of a member function based + * on a pointer to that function. Specifically, these type traits provide a + * class_t, a return_t and a argsTuple_t alias that correspond respectively to + * the class that the function is a member of, the return type of the member + * function and the list of parameters types packed in a tuple. Convenience + * Convenience template aliases are also provided. + * + * Example, assuming "struct Struct {void foo(int, bool);};": + * - MemberFunctionClass_t<&Struct::foo> is Struct. + * - MemberFunctionReturn_t<&Struct::foo> is void. + * - MemberFunctionArgsTuple_t<&Struct::foo> is std::tuple. + */ + +template +struct MemberFunctionSignature; +template +struct MemberFunctionSignature +{ + using class_t = C; + using return_t = R; + using argsTuple_t = std::tuple; +}; +template +struct MemberFunctionSignature +{ + using class_t = std::add_const_t; + using return_t = R; + using argsTuple_t = std::tuple; +}; +template +struct MemberFunctionSignature +{ + using class_t = std::add_volatile_t; + using return_t = R; + using argsTuple_t = std::tuple; +}; +template +struct MemberFunctionSignature +{ + using class_t = std::add_cv_t; + using return_t = R; + using argsTuple_t = std::tuple; +}; +template +using MemberFunctionClass_t = + typename MemberFunctionSignature::class_t; + +template +using MemberFunctionReturn_t = + typename MemberFunctionSignature::return_t; + +template +using MemberFunctionArgsTuple_t = + typename MemberFunctionSignature::argsTuple_t; + +} // namespace gem5 + +#endif // __BASE_TYPETRAITS_HH__ From 4ec432caa435e69946fcf6735838027e6cd5789a Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 2 Feb 2023 16:09:57 +0000 Subject: [PATCH 256/492] sim: Define a new MemberEventWrapper event class This new event class simplifies the use of EventWrapper and aims at superseeding it. EventWrapper has been redefined in terms of MemberEventWrapper. MemberEventWrapper makes use of the new type traits to simplify template parameterization and encourage its use over SimpleEvent that often wraps a lambda that merely calls a member function. Change-Id: Ie59e4c51705b9c2b2faa27097678d7d85f5b99c6 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67653 Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Daniel Carvalho Reviewed-by: Daniel Carvalho --- src/sim/eventq.hh | 74 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index 62495bf86d..b46a25bf38 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -46,7 +46,9 @@ #include "base/debug.hh" #include "base/flags.hh" +#include "base/named.hh" #include "base/trace.hh" +#include "base/type_traits.hh" #include "base/types.hh" #include "base/uncontended_mutex.hh" #include "debug/Event.hh" @@ -1071,38 +1073,64 @@ class EventManager void setCurTick(Tick newVal) { eventq->setCurTick(newVal); } }; -template -class EventWrapper : public Event +/** + * @brief Wrap a member function inside MemberEventWrapper to use it as an + * event callback. This wrapper should be prefered over EventFunctionWrapper + * for better performance and type safety. + * + * Wrapping a function *process* member of a class *klass* can be done by + * adding a member variable of the following type: + * MemberEventWrapper<&klass::process>. + * + * It is required that klass::process takes no explicit argument and returns no + * value as these could not be handled by the event scheduler. + * + * @tparam F Pointer to the member function wrapped in this event. + */ +template +class MemberEventWrapper final: public Event, public Named { - private: - T *object; + using CLASS = MemberFunctionClass_t; + static_assert(std::is_same_v>); + static_assert(std::is_same_v, std::tuple<>>); - public: - EventWrapper(T *obj, bool del = false, Priority p = Default_Pri) - : Event(p), object(obj) +public: + MemberEventWrapper(CLASS *object, + bool del = false, + Priority p = Default_Pri): + Event(p), + Named(object->name() + ".wrapped_event"), + mObject(object) { - if (del) - setFlags(AutoDelete); + gem5_assert(mObject); + if (del) setFlags(AutoDelete); } - EventWrapper(T &obj, bool del = false, Priority p = Default_Pri) - : Event(p), object(&obj) - { - if (del) - setFlags(AutoDelete); + /** + * @brief Construct a new MemberEventWrapper object + * + * @param object instance of the object to call the wrapped member func on + * @param del if true, flag this event as AutoDelete + * @param p priority of this event + */ + MemberEventWrapper(CLASS &object, + bool del = false, + Priority p = Default_Pri): + MemberEventWrapper(&object, del, p) + {} + + void process() override { + (mObject->*F)(); } - void process() { (object->*F)(); } - - const std::string - name() const - { - return object->name() + ".wrapped_event"; - } - - const char *description() const { return "EventWrapped"; } + const char *description() const override { return "EventWrapped"; } +private: + CLASS *mObject; }; +template +using EventWrapper = MemberEventWrapper; + class EventFunctionWrapper : public Event { private: From 1bb8cd3d44c563877d486953f0534c4dc9daa9e1 Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 2 Feb 2023 16:16:24 +0000 Subject: [PATCH 257/492] sim: Switch from EventWrapper to MemberEventWrapper before deprec Change-Id: I25c81787d522a0dd063112b6727669da46e0f0e7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67655 Reviewed-by: Daniel Carvalho Tested-by: kokoro Maintainer: Jason Lowe-Power --- src/arch/sparc/isa.hh | 6 +++--- src/base/remote_gdb.hh | 6 +++--- src/dev/arm/smmu_v3.hh | 2 +- src/dev/arm/smmu_v3_deviceifc.hh | 4 +--- src/dev/arm/smmu_v3_proc.cc | 3 +-- src/mem/qos/mem_sink.hh | 4 +--- src/sim/power_domain.hh | 2 +- src/systemc/core/kernel.hh | 2 +- src/systemc/core/scheduler.hh | 12 ++++++------ .../systemc_simple_object/feeder.hh | 2 +- 10 files changed, 19 insertions(+), 24 deletions(-) diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh index 22bfba4f06..dee71d6e0e 100644 --- a/src/arch/sparc/isa.hh +++ b/src/arch/sparc/isa.hh @@ -133,13 +133,13 @@ class ISA : public BaseISA void processSTickCompare(); void processHSTickCompare(); - typedef EventWrapper TickCompareEvent; + typedef MemberEventWrapper<&ISA::processTickCompare> TickCompareEvent; TickCompareEvent *tickCompare = nullptr; - typedef EventWrapper STickCompareEvent; + typedef MemberEventWrapper<&ISA::processSTickCompare> STickCompareEvent; STickCompareEvent *sTickCompare = nullptr; - typedef EventWrapper HSTickCompareEvent; + typedef MemberEventWrapper<&ISA::processHSTickCompare> HSTickCompareEvent; HSTickCompareEvent *hSTickCompare = nullptr; static const int NumGlobalRegs = 8; diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh index 1c5cd9c7af..7981a13064 100644 --- a/src/base/remote_gdb.hh +++ b/src/base/remote_gdb.hh @@ -274,8 +274,8 @@ class BaseRemoteGDB BaseGdbRegCache *regCachePtr = nullptr; - EventWrapper connectEvent; - EventWrapper disconnectEvent; + MemberEventWrapper<&BaseRemoteGDB::connect> connectEvent; + MemberEventWrapper<&BaseRemoteGDB::detach> disconnectEvent; class TrapEvent : public Event { @@ -308,7 +308,7 @@ class BaseRemoteGDB // Single step. void singleStep(); - EventWrapper singleStepEvent; + MemberEventWrapper<&BaseRemoteGDB::singleStep> singleStepEvent; void clearSingleStep(); void setSingleStep(); diff --git a/src/dev/arm/smmu_v3.hh b/src/dev/arm/smmu_v3.hh index 25b91ff620..8721352c47 100644 --- a/src/dev/arm/smmu_v3.hh +++ b/src/dev/arm/smmu_v3.hh @@ -167,7 +167,7 @@ class SMMUv3 : public ClockedObject SMMUAction runProcessTiming(SMMUProcess *proc, PacketPtr pkt); void processCommands(); - EventWrapper processCommandsEvent; + MemberEventWrapper<&SMMUv3::processCommands> processCommandsEvent; void processCommand(const SMMUCommand &cmd); diff --git a/src/dev/arm/smmu_v3_deviceifc.hh b/src/dev/arm/smmu_v3_deviceifc.hh index c4ffa379f6..3152f8a698 100644 --- a/src/dev/arm/smmu_v3_deviceifc.hh +++ b/src/dev/arm/smmu_v3_deviceifc.hh @@ -114,9 +114,7 @@ class SMMUv3DeviceInterface : public ClockedObject bool atsDeviceNeedsRetry; SMMUDeviceRetryEvent sendDeviceRetryEvent; - EventWrapper< - SMMUv3DeviceInterface, - &SMMUv3DeviceInterface::atsSendDeviceRetry> atsSendDeviceRetryEvent; + MemberEventWrapper<&SMMUv3DeviceInterface::atsSendDeviceRetry> atsSendDeviceRetryEvent; Port& getPort(const std::string &name, PortID id) override; diff --git a/src/dev/arm/smmu_v3_proc.cc b/src/dev/arm/smmu_v3_proc.cc index 2cf2cf9e98..ef3db5dd4d 100644 --- a/src/dev/arm/smmu_v3_proc.cc +++ b/src/dev/arm/smmu_v3_proc.cc @@ -199,8 +199,7 @@ SMMUProcess::doBroadcastSignal(SMMUSignal &sig) void SMMUProcess::scheduleWakeup(Tick when) { - auto *ep = new EventWrapper< - SMMUProcess, &SMMUProcess::wakeup> (this, true); + auto *ep = new MemberEventWrapper<&SMMUProcess::wakeup> (this, true); smmu.schedule(ep, when); } diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh index d2310c65fe..bd42a9a2ed 100644 --- a/src/mem/qos/mem_sink.hh +++ b/src/mem/qos/mem_sink.hh @@ -222,9 +222,7 @@ class MemSinkCtrl : public MemCtrl void processNextReqEvent(); /** Event wrapper to schedule next request handler function */ - EventWrapper< - MemSinkCtrl, - &MemSinkCtrl::processNextReqEvent> nextReqEvent; + MemberEventWrapper<&MemSinkCtrl::processNextReqEvent> nextReqEvent; /** * Check if the read queue has room for more entries diff --git a/src/sim/power_domain.hh b/src/sim/power_domain.hh index 96233e436b..1264d8f1ba 100644 --- a/src/sim/power_domain.hh +++ b/src/sim/power_domain.hh @@ -151,7 +151,7 @@ class PowerDomain : public PowerState /** * Event to update the power states of the followers */ - EventWrapper + MemberEventWrapper<&PowerDomain::setFollowerPowerStates> pwrStateUpdateEvent; protected: diff --git a/src/systemc/core/kernel.hh b/src/systemc/core/kernel.hh index 9dba9030be..ec47569279 100644 --- a/src/systemc/core/kernel.hh +++ b/src/systemc/core/kernel.hh @@ -65,7 +65,7 @@ class Kernel : public gem5::SimObject private: static void stopWork(); - gem5::EventWrapper t0Event; + gem5::MemberEventWrapper<&Kernel::t0Handler> t0Event; }; extern Kernel *kernel; diff --git a/src/systemc/core/scheduler.hh b/src/systemc/core/scheduler.hh index 6eabb5606c..49ad6c6f0d 100644 --- a/src/systemc/core/scheduler.hh +++ b/src/systemc/core/scheduler.hh @@ -465,13 +465,13 @@ class Scheduler } void runReady(); - gem5::EventWrapper readyEvent; + gem5::MemberEventWrapper<&Scheduler::runReady> readyEvent; void scheduleReadyEvent(); void pause(); void stop(); - gem5::EventWrapper pauseEvent; - gem5::EventWrapper stopEvent; + gem5::MemberEventWrapper<&Scheduler::pause> pauseEvent; + gem5::MemberEventWrapper<&Scheduler::stop> stopEvent; const ::sc_core::sc_report *_throwUp; @@ -484,7 +484,7 @@ class Scheduler timeSlots.front()->targeted_when > maxTick) && initList.empty()); } - gem5::EventWrapper starvationEvent; + gem5::MemberEventWrapper<&Scheduler::pause> starvationEvent; void scheduleStarvationEvent(); bool _elaborationDone; @@ -502,10 +502,10 @@ class Scheduler _changeStamp++; pause(); } - gem5::EventWrapper maxTickEvent; + gem5::MemberEventWrapper<&Scheduler::maxTickFunc> maxTickEvent; void timeAdvances() { trace(false); } - gem5::EventWrapper timeAdvancesEvent; + gem5::MemberEventWrapper<&Scheduler::timeAdvances> timeAdvancesEvent; void scheduleTimeAdvancesEvent() { diff --git a/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh b/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh index c843c83e2b..865362cf8f 100644 --- a/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh +++ b/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh @@ -64,7 +64,7 @@ class Feeder : public gem5::SimObject // except to help interact with systemc objects/models. sc_core::sc_buffer buf; - gem5::EventWrapper event; + gem5::MemberEventWrapper<&Feeder::feed> event; void startup() override; }; From 7813e294ff9fc83511f0d6f1d8fb433c1b56db7b Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 2 Feb 2023 16:14:04 +0000 Subject: [PATCH 258/492] sim: Deprecate EventWrapper in favour of MemberEventWrapper Change-Id: I87363fb36cd998e7f0afeb25381e5b230a15b493 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67654 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Reviewed-by: Daniel Carvalho Tested-by: kokoro --- src/sim/eventq.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index b46a25bf38..ff9d4bfa08 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -1129,7 +1129,8 @@ private: }; template -using EventWrapper = MemberEventWrapper; +using EventWrapper [[deprecated("Use MemberEventWrapper instead")]] + = MemberEventWrapper; class EventFunctionWrapper : public Event { From ba19f967d7529542f790bcd15a2746e399591fdf Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 2 Feb 2023 16:54:33 +0000 Subject: [PATCH 259/492] sim: Use ref constructor of MemberEventWrapper everywhere Change-Id: I77989aa7318142634c771c558293138e7b1e8e51 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67657 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Daniel Carvalho --- src/arch/sparc/isa.cc | 6 +++--- src/arch/sparc/ua2005.cc | 6 +++--- src/base/remote_gdb.cc | 2 +- src/dev/arm/smmu_v3.cc | 2 +- src/dev/arm/smmu_v3_deviceifc.cc | 2 +- src/dev/arm/smmu_v3_proc.cc | 2 +- src/mem/qos/mem_sink.cc | 2 +- src/systemc/core/kernel.cc | 2 +- src/systemc/core/scheduler.cc | 12 ++++++------ 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc index 38b3d1c3e2..e7807c2b0a 100644 --- a/src/arch/sparc/isa.cc +++ b/src/arch/sparc/isa.cc @@ -953,15 +953,15 @@ ISA::unserialize(CheckpointIn &cp) UNSERIALIZE_SCALAR(hstick_cmp); if (tick_cmp) { - tickCompare = new TickCompareEvent(this); + tickCompare = new TickCompareEvent(*this); schedule(tickCompare, tick_cmp); } if (stick_cmp) { - sTickCompare = new STickCompareEvent(this); + sTickCompare = new STickCompareEvent(*this); schedule(sTickCompare, stick_cmp); } if (hstick_cmp) { - hSTickCompare = new HSTickCompareEvent(this); + hSTickCompare = new HSTickCompareEvent(*this); schedule(hSTickCompare, hstick_cmp); } } diff --git a/src/arch/sparc/ua2005.cc b/src/arch/sparc/ua2005.cc index 3f5372af53..45cc9d75c4 100644 --- a/src/arch/sparc/ua2005.cc +++ b/src/arch/sparc/ua2005.cc @@ -107,7 +107,7 @@ ISA::setFSReg(int miscReg, RegVal val) case MISCREG_TICK_CMPR: if (tickCompare == NULL) - tickCompare = new TickCompareEvent(this); + tickCompare = new TickCompareEvent(*this); setMiscRegNoEffect(miscReg, val); if ((tick_cmpr & ~mask(63)) && tickCompare->scheduled()) cpu->deschedule(tickCompare); @@ -122,7 +122,7 @@ ISA::setFSReg(int miscReg, RegVal val) case MISCREG_STICK_CMPR: if (sTickCompare == NULL) - sTickCompare = new STickCompareEvent(this); + sTickCompare = new STickCompareEvent(*this); setMiscRegNoEffect(miscReg, val); if ((stick_cmpr & ~mask(63)) && sTickCompare->scheduled()) cpu->deschedule(sTickCompare); @@ -193,7 +193,7 @@ ISA::setFSReg(int miscReg, RegVal val) case MISCREG_HSTICK_CMPR: if (hSTickCompare == NULL) - hSTickCompare = new HSTickCompareEvent(this); + hSTickCompare = new HSTickCompareEvent(*this); setMiscRegNoEffect(miscReg, val); if ((hstick_cmpr & ~mask(63)) && hSTickCompare->scheduled()) cpu->deschedule(hSTickCompare); diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc index 43f53d1247..dd37a3503a 100644 --- a/src/base/remote_gdb.cc +++ b/src/base/remote_gdb.cc @@ -393,7 +393,7 @@ std::map hardBreakMap; BaseRemoteGDB::BaseRemoteGDB(System *_system, int _port) : incomingConnectionEvent(nullptr), incomingDataEvent(nullptr), _port(_port), fd(-1), sys(_system), - connectEvent(this), disconnectEvent(this), trapEvent(this), + connectEvent(*this), disconnectEvent(*this), trapEvent(this), singleStepEvent(*this) {} diff --git a/src/dev/arm/smmu_v3.cc b/src/dev/arm/smmu_v3.cc index 41f7424e77..8ce8bd92b2 100644 --- a/src/dev/arm/smmu_v3.cc +++ b/src/dev/arm/smmu_v3.cc @@ -100,7 +100,7 @@ SMMUv3::SMMUv3(const SMMUv3Params ¶ms) : deviceInterfaces(params.device_interfaces), commandExecutor(name() + ".cmd_exec", *this), regsMap(params.reg_map), - processCommandsEvent(this) + processCommandsEvent(*this) { fatal_if(regsMap.size() != SMMU_REG_SIZE, "Invalid register map size: %#x different than SMMU_REG_SIZE = %#x\n", diff --git a/src/dev/arm/smmu_v3_deviceifc.cc b/src/dev/arm/smmu_v3_deviceifc.cc index 166b85d727..0966150541 100644 --- a/src/dev/arm/smmu_v3_deviceifc.cc +++ b/src/dev/arm/smmu_v3_deviceifc.cc @@ -78,7 +78,7 @@ SMMUv3DeviceInterface::SMMUv3DeviceInterface( deviceNeedsRetry(false), atsDeviceNeedsRetry(false), sendDeviceRetryEvent(*this), - atsSendDeviceRetryEvent(this) + atsSendDeviceRetryEvent(*this) {} void diff --git a/src/dev/arm/smmu_v3_proc.cc b/src/dev/arm/smmu_v3_proc.cc index ef3db5dd4d..f0c2633cf5 100644 --- a/src/dev/arm/smmu_v3_proc.cc +++ b/src/dev/arm/smmu_v3_proc.cc @@ -199,7 +199,7 @@ SMMUProcess::doBroadcastSignal(SMMUSignal &sig) void SMMUProcess::scheduleWakeup(Tick when) { - auto *ep = new MemberEventWrapper<&SMMUProcess::wakeup> (this, true); + auto *ep = new MemberEventWrapper<&SMMUProcess::wakeup> (*this, true); smmu.schedule(ep, when); } diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc index b6b77ca9df..66b945153b 100644 --- a/src/mem/qos/mem_sink.cc +++ b/src/mem/qos/mem_sink.cc @@ -60,7 +60,7 @@ MemSinkCtrl::MemSinkCtrl(const QoSMemSinkCtrlParams &p) readBufferSize(p.read_buffer_size), writeBufferSize(p.write_buffer_size), port(name() + ".port", *this), interface(p.interface), - retryRdReq(false), retryWrReq(false), nextRequest(0), nextReqEvent(this), + retryRdReq(false), retryWrReq(false), nextRequest(0), nextReqEvent(*this), stats(this) { // Resize read and write queue to allocate space diff --git a/src/systemc/core/kernel.cc b/src/systemc/core/kernel.cc index ae67e4676c..f6c96e5b2d 100644 --- a/src/systemc/core/kernel.cc +++ b/src/systemc/core/kernel.cc @@ -56,7 +56,7 @@ void Kernel::status(sc_core::sc_status s) { _status = s; } Kernel::Kernel(const Params ¶ms, int) : gem5::SimObject(params), - t0Event(this, false, gem5::EventBase::Default_Pri - 1) + t0Event(*this, false, gem5::EventBase::Default_Pri - 1) { // Install ourselves as the scheduler's event manager. ::sc_gem5::scheduler.setEventQueue(eventQueue()); diff --git a/src/systemc/core/scheduler.cc b/src/systemc/core/scheduler.cc index 42a2ca43b6..bcbc262119 100644 --- a/src/systemc/core/scheduler.cc +++ b/src/systemc/core/scheduler.cc @@ -44,14 +44,14 @@ namespace sc_gem5 { Scheduler::Scheduler() : - eq(nullptr), readyEvent(this, false, ReadyPriority), - pauseEvent(this, false, PausePriority), - stopEvent(this, false, StopPriority), _throwUp(nullptr), - starvationEvent(this, false, StarvationPriority), + eq(nullptr), readyEvent(*this, false, ReadyPriority), + pauseEvent(*this, false, PausePriority), + stopEvent(*this, false, StopPriority), _throwUp(nullptr), + starvationEvent(*this, false, StarvationPriority), _elaborationDone(false), _started(false), _stopNow(false), _status(StatusOther), maxTick(gem5::MaxTick), - maxTickEvent(this, false, MaxTickPriority), - timeAdvancesEvent(this, false, TimeAdvancesPriority), _numCycles(0), + maxTickEvent(*this, false, MaxTickPriority), + timeAdvancesEvent(*this, false, TimeAdvancesPriority), _numCycles(0), _changeStamp(0), _current(nullptr), initDone(false), runToTime(true), runOnce(false) {} From 99852d56876eb3b0e26ec2a15752321a4a047ebd Mon Sep 17 00:00:00 2001 From: Gabriel Busnot Date: Thu, 2 Feb 2023 16:21:57 +0000 Subject: [PATCH 260/492] sim: Deprecate pointer version of MemberEventWrapper constructor It makes no sense to initialize such event with nullptr. Favor the reference version for safer behavior. Change-Id: I695f41362a56aca98ceb52d49cf84be43f1465a2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67656 Reviewed-by: Daniel Carvalho Tested-by: kokoro Maintainer: Daniel Carvalho --- src/sim/eventq.hh | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index ff9d4bfa08..cf1734b992 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -1095,16 +1095,12 @@ class MemberEventWrapper final: public Event, public Named static_assert(std::is_same_v, std::tuple<>>); public: + [[deprecated("Use reference version of this constructor instead")]] MemberEventWrapper(CLASS *object, bool del = false, Priority p = Default_Pri): - Event(p), - Named(object->name() + ".wrapped_event"), - mObject(object) - { - gem5_assert(mObject); - if (del) setFlags(AutoDelete); - } + MemberEventWrapper{*object, del, p} + {} /** * @brief Construct a new MemberEventWrapper object @@ -1116,8 +1112,13 @@ public: MemberEventWrapper(CLASS &object, bool del = false, Priority p = Default_Pri): - MemberEventWrapper(&object, del, p) - {} + Event(p), + Named(object.name() + ".wrapped_event"), + mObject(&object) + { + if (del) setFlags(AutoDelete); + gem5_assert(mObject); + } void process() override { (mObject->*F)(); From a589d7b5697b3fbe61e1842e1831aef50aa96f32 Mon Sep 17 00:00:00 2001 From: Razeza Date: Wed, 8 Feb 2023 13:22:50 +0300 Subject: [PATCH 261/492] arch-x86: Add instructions from SSE4.1 set. The following instructions were implemented: PHMINPOSUW, ROUNDSS, ROUNDSD, EXTRACTPS, INSERTPS, PMULLD, PMULDQ, PCMPGTQ, PMINUW, PMINUD, PMINSB, MINSD, PMAXUW, PMAXUD, PMAXSB, PMAXSD, PEXTRB, PEXTRW for memory, PEXTRD, PEXTRQ, PINSRB, PINSRD, PINSRQ, PACKUSDW, PBLENDW, BLENDPS, BLENDPD, BLENDVPD, BLENDVPS, PBLENDVB, PMOVSXDQ, PMOVSXWQ, PMOVSXWD, PMOVSXBQ, PMOVSXBD, PMOVSXBW, PMOVZXDQ, PMOVZXWQ, PMOVZXWD, PMOVZXWD, PMOVZXBQ, PMOVZXBD, PMOVZXBW. Also fix bug in PACKUSWB_XMM_M, it was marked as sign operation, though it is unsigned. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1308 Change-Id: I1a8d26c0426690841dcc80a6fa5dcffb8cbc5d9a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67737 Maintainer: Bobby Bruce Reviewed-by: Bobby Bruce Tested-by: kokoro --- src/arch/x86/insts/micromediaop.hh | 7 + src/arch/x86/insts/microop_args.hh | 15 + .../isa/decoder/three_byte_0f38_opcodes.isa | 124 +++++-- .../isa/decoder/three_byte_0f3a_opcodes.isa | 61 +++- .../compare_and_write_minimum_or_maximum.py | 20 ++ .../data_conversion/__init__.py | 1 + .../floating_point/data_conversion/round.py | 72 ++++ .../floating_point/data_transfer/move.py | 39 ++ .../simd128/integer/arithmetic/__init__.py | 1 + .../simd128/integer/arithmetic/absolute.py | 96 +++++ .../integer/arithmetic/multiplication.py | 40 +++ .../integer/compare/compare_and_write_mask.py | 20 ++ .../compare_and_write_minimum_or_maximum.py | 160 +++++++++ .../data_reordering/extract_and_insert.py | 121 +++++++ .../data_reordering/pack_with_saturation.py | 25 +- .../integer/data_reordering/shuffle.py | 120 +++++++ .../simd128/integer/data_transfer/__init__.py | 2 +- .../simd128/integer/data_transfer/move.py | 229 ++++++++++++ .../integer/data_transfer/move_with_shift.py | 59 +++ src/arch/x86/isa/microasm.isa | 2 +- src/arch/x86/isa/microops/base.isa | 12 + src/arch/x86/isa/microops/mediaop.isa | 337 ++++++++++++++++++ src/arch/x86/isa/operands.isa | 5 +- src/base/bitfield.hh | 16 + 24 files changed, 1534 insertions(+), 50 deletions(-) create mode 100644 src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py create mode 100644 src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py create mode 100644 src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh index bd897f90e2..be82429f15 100644 --- a/src/arch/x86/insts/micromediaop.hh +++ b/src/arch/x86/insts/micromediaop.hh @@ -40,6 +40,7 @@ namespace X86ISA enum MediaFlag { MediaMultHiOp = 1, + MediaPartHiOp = 32, MediaSignedOp = 64, MediaScalarOp = 128 }; @@ -77,6 +78,12 @@ class MediaOpBase : public X86MicroopBase return ext & MediaMultHiOp; } + bool + partHi() const + { + return ext & MediaPartHiOp; + } + bool signedOp() const { diff --git a/src/arch/x86/insts/microop_args.hh b/src/arch/x86/insts/microop_args.hh index 9dd121b3b1..c9850ca171 100644 --- a/src/arch/x86/insts/microop_args.hh +++ b/src/arch/x86/insts/microop_args.hh @@ -91,6 +91,19 @@ struct Src2Op {} }; +struct Src3Op +{ + const RegIndex src3; + const size_t size; + RegIndex opIndex() const { return src3; } + + Src3Op(RegIndex _src3, size_t _size) : src3(_src3), size(_size) {} + template + Src3Op(RegIndex _src3, InstType *inst) : src3(_src3), + size(inst->getSrcSize()) + {} +}; + struct DataOp { const RegIndex data; @@ -271,6 +284,8 @@ using FoldedSrc2Op = FoldedOp; using FloatSrc2Op = FloatOp; using IntSrc2Op = IntOp; +using FloatSrc3Op = FloatOp; + using FoldedDataOp = FoldedOp; using FloatDataOp = FloatOp; using FoldedDataHiOp = FoldedOp; diff --git a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa index 0f4330bf7c..ea54e1578d 100644 --- a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa +++ b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa @@ -43,40 +43,102 @@ 0x09: psignw_Vdq_Wdq(); 0x0A: psignd_Vdq_Wdq(); 0x0B: pmulhrsw_Vdq_Wdq(); - 0x10: pblendvb_Vdq_Wdq(); - 0x14: blendvps_Vdq_Wdq(); - 0x15: blendvpd_Vdq_Wdq(); + 0x10: decode MODRM_MOD { + default: Inst::PBLENDVB(Vdq, Wdq); + } + 0x14: decode MODRM_MOD { + default: Inst::BLENDVPS(Vdq, Wdq); + } + 0x15: decode MODRM_MOD { + default: Inst::BLENDVPD(Vdq, Wdq); + } 0x17: ptest_Vdq_Wdq(); - 0x1C: pabsb_Vdq_Wdq(); - 0x1D: pabsw_Vdq_Wdq(); - 0x1E: pabsd_Vdq_Wdq(); - 0x20: pmovsxbw_Vdq_Udq_or_Mq(); - 0x21: pmovsxbd_Vdq_Udq_or_Md(); - 0x22: pmovsxbq_Vdq_Udq_or_Mw(); - 0x23: pmovsxwd_Vdq_Udq_or_Mq(); - 0x24: pmovsxwq_Vdq_Udq_or_Md(); - 0x25: pmovsxdq_Vdq_Udq_or_Mq(); - 0x28: pmuldq_Vdq_Wdq(); + 0x1C: decode MODRM_MOD { + default: Inst::PABSB(Vdq, Wdq); + } + 0x1D: decode MODRM_MOD { + default: Inst::PABSW(Vdq, Wdq); + } + 0x1E: decode MODRM_MOD { + default: Inst::PABSD(Vdq, Wdq); + } + 0x20: decode MODRM_MOD { + default: Inst::PMOVSXBW(Vdq, Wdq); + } + 0x21: decode MODRM_MOD { + default: Inst::PMOVSXBD(Vdq, Wdq); + } + 0x22: decode MODRM_MOD { + default: Inst::PMOVSXBQ(Vdq, Wdq); + } + 0x23: decode MODRM_MOD { + default: Inst::PMOVSXWD(Vdq, Wdq); + } + 0x24: decode MODRM_MOD { + default: Inst::PMOVSXWQ(Vdq, Wdq); + } + 0x25: decode MODRM_MOD { + default: Inst::PMOVSXDQ(Vdq, Wdq); + } + 0x28: decode MODRM_MOD { + default: Inst::PMULDQ(Vdq, Wdq); + } 0x29: pcmpeqq_Vdq_Wdq(); 0x2A: movntdqa_Vdq_Mdq(); - 0x2B: packusdw_Vdq_Wdq(); - 0x30: pmovzxbw_Vdq_Udq_or_Mq(); - 0x31: pmovzxbd_Vdq_Udq_or_Md(); - 0x32: pmovzxbq_Vdq_Udq_or_Mw(); - 0x33: pmovzxwd_Vdq_Udq_or_Mq(); - 0x34: pmovzxwq_Vdq_Udq_or_Md(); - 0x35: pmovzxdq_Vdq_Udq_or_Mq(); - 0x37: pcmpgtq_Vdq_Wdq(); - 0x38: pminsb_Vdq_Wdq(); - 0x39: pminsd_Vdq_Wdq(); - 0x3A: pminuw_Vdq_Wdq(); - 0x3B: pminud_Vdq_Wdq(); - 0x3C: pmaxsb_Vdq_Wdq(); - 0x3D: pmaxsd_Vdq_Wdq(); - 0x3E: pmaxuw_Vdq_Wdq(); - 0x3F: pmaxud_Vdq_Wdq(); - 0x40: pmulld_Vdq_Wdq(); - 0x41: phminposuw_Vdq_Wdq(); + 0x2B: decode MODRM_MOD { + default: Inst::PACKUSDW(Vdq, Wdq); + } + 0x30: decode MODRM_MOD { + default: Inst::PMOVZXBW(Vdq, Wdq); + } + 0x31: decode MODRM_MOD { + default: Inst::PMOVZXBD(Vdq, Wdq); + } + 0x32: decode MODRM_MOD { + default: Inst::PMOVZXBQ(Vdq, Wdq); + } + 0x33: decode MODRM_MOD { + default: Inst::PMOVZXWD(Vdq, Wdq); + } + 0x34: decode MODRM_MOD { + default: Inst::PMOVZXWQ(Vdq, Wdq); + } + 0x35: decode MODRM_MOD { + default: Inst::PMOVZXDQ(Vdq, Wdq); + } + 0x37: decode MODRM_MOD { + default: Inst::PCMPGTQ(Vdq, Wdq); + } + 0x38: decode MODRM_MOD { + default: Inst::PMINSB(Vdq, Wdq); + } + 0x39: decode MODRM_MOD { + default: Inst::PMINSD(Vdq, Wdq); + } + 0x3A: decode MODRM_MOD { + default: Inst::PMINUW(Vdq, Wdq); + } + 0x3B: decode MODRM_MOD { + default: Inst::PMINUD(Vdq, Wdq); + } + 0x3C: decode MODRM_MOD { + default: Inst::PMAXSB(Vdq, Wdq); + } + 0x3D: decode MODRM_MOD { + default: Inst::PMAXSD(Vdq, Wdq); + } + 0x3E: decode MODRM_MOD { + default: Inst::PMAXUW(Vdq, Wdq); + } + 0x3F: decode MODRM_MOD { + default: Inst::PMAXUD(Vdq, Wdq); + } + 0x40: decode MODRM_MOD { + default: Inst::PMULLD(Vdq, Wdq); + } + 0x41: decode MODRM_MOD { + default: Inst::PHMINPOSUW(Vdq, Wdq); + } default: Inst::UD2(); } default: decode LEGACY_REPNE { diff --git a/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa index 0c66fa5cff..b1e84733fa 100644 --- a/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa +++ b/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa @@ -33,22 +33,57 @@ format WarnUnimpl { 1: decode OPCODE_OP { 0x08: roundps_Vdq_Wdq_Ib(); 0x09: roundpd_Vdq_Wdq_Ib(); - 0x0A: roundss_Vss_Wss_Ib(); - 0x0B: roundsd_Vsd_Wsd_Ib(); - 0x0C: blendps_Vdq_Wdq_Ib(); - 0x0D: blendpd_Vdq_Wdq_Ib(); - 0x0E: pblendw_Vdq_Wdq_Ib(); - 0x0F: palignr_Vdq_Wdq_Ib(); - 0x14: pextrb_Rd_or_Mb_Vdq_Ib(); + 0x0A: decode MODRM_MOD { + 0x3: Inst::ROUNDSS(Vdq, Wdq, Ib); + default: Inst::ROUNDSS(Vss, Md, Ib); + } + 0x0B: decode MODRM_MOD { + 0x3: Inst::ROUNDSD(Vss, Wdq, Ib); + default: Inst::ROUNDSD(Vss, Mq, Ib); + } + 0x0C: decode MODRM_MOD { + default: Inst::BLENDPS(Vdq, Wdq, Ib); + } + 0x0D: decode MODRM_MOD { + default: Inst::BLENDPD(Vdq, Wdq, Ib); + } + 0x0E: decode MODRM_MOD { + default: Inst::PBLENDW(Vdq, Wdq, Ib); + } + 0x0F: decode MODRM_MOD { + default: Inst::PALIGNR(Vdq, Wdq, Ib); + } + 0x14: decode MODRM_MOD { + 0x3: Inst::PEXTRB(Rd, Vdq, Ib); + default: Inst::PEXTRB(Mb, Vdq, Ib); + } 0x15: decode MODRM_MOD { 0x3: Inst::PEXTRW(Rd,Vdq,Ib); - default: pextrw_Mw_Vdq_Ib(); + default: Inst::PEXTRW(Mw,Vdq,Ib); + } + 0x16: decode MODRM_MOD { + default: decode REX_W { + 0x0: Inst::PEXTRD(Ed, Vdq, Ib); + 0x1: Inst::PEXTRQ(Eq, Vdq, Ib); + } + } + 0x17: decode MODRM_MOD { + default: Inst::EXTRACTPS(Ed, Vdq, Ib); + } + 0x20: decode MODRM_MOD { + 0x3: Inst::PINSRB(Vdq, Rq, Ib); + default: Inst::PINSRB(Vdq, Mb, Ib); + } + 0x21: decode MODRM_MOD { + 0x3: Inst::INSERTPS(Vdq, Wdq, Ib); + default: Inst::INSERTPS(Vdq, Md, Ib); + } + 0x22: decode MODRM_MOD { + default: decode REX_W { + 0x0: Inst::PINSRD(Vdq, Ed, Ib); + 0x1: Inst::PINSRQ(Vdq, Eq, Ib); + } } - 0x16: pextrd_pextrq_Ed_or_Eq_Vdq_Ib(); - 0x17: extractps_Ed_Vdq_Ib(); - 0x20: pinsrb_Vdq_Rd_or_Rq_or_Mb_Ib(); - 0x21: insertps_Vdq_Udq_or_Md_Ib(); - 0x22: pinsrd_pinsrq_Vdq_Ed_or_Eq_Ib(); 0x40: dpps_Vdq_Wdq_Ib(); 0x41: dppd_Vdq_Wdq_Ib(); 0x42: pcmpistrm_Vdq_Wdq_Ib(); diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py index ec9bf0e06c..e5aaf694b9 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py @@ -173,4 +173,24 @@ def macroop MAXSD_XMM_P { ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 mmaxf xmml, xmml, ufp1, ext=Scalar, size=8 }; + +def macroop PHMINPOSUW_XMM_XMM { + phminposuw xmml, xmmlm, xmmhm, size=2 + xorfp xmmh, xmmh, xmmh +}; + +def macroop PHMINPOSUW_XMM_M { + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + phminposuw xmml, ufp1, ufp2, size=2 + xorfp xmmh, xmmh, xmmh +}; + +def macroop PHMINPOSUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + phminposuw xmml, ufp1, ufp2, size=2 + xorfp xmmh, xmmh, xmmh +}; """ diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py index c8a2d2f2b3..6661dc8120 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py @@ -38,6 +38,7 @@ categories = [ "convert_floating_point_to_xmm_integer", "convert_floating_point_to_mmx_integer", "convert_floating_point_to_gpr_integer", + "round", ] microcode = """ diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py new file mode 100644 index 0000000000..ea2a7341d6 --- /dev/null +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py @@ -0,0 +1,72 @@ +# Copyright (c) 2007 The Hewlett-Packard Development Company +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +microcode = """ +def macroop ROUNDSS_XMM_XMM_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rounds xmml, xmmlm, t1, "IMMEDIATE", size=4 +}; + +def macroop ROUNDSS_XMM_M_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + rounds xmml, ufp1, t1, "IMMEDIATE", size=4 +}; + +def macroop ROUNDSS_XMM_P_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + rounds xmml, ufp1, t1, "IMMEDIATE", size=4 +}; + +def macroop ROUNDSD_XMM_XMM_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rounds xmml, xmmlm, t1, "IMMEDIATE", size=8 +}; + +def macroop ROUNDSD_XMM_M_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + rounds xmml, ufp1, t1, "IMMEDIATE", size=8 +}; + +def macroop ROUNDSD_XMM_P_I { + rdval t1, ctrlRegIdx("misc_reg::Mxcsr") + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + rounds xmml, ufp1, t1, "IMMEDIATE", size=8 +}; +""" diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py index 2a4a152c9f..607a53d828 100644 --- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py @@ -276,4 +276,43 @@ def macroop MOVSD_P_XMM { def macroop MOVSD_XMM_XMM { movfp xmml, xmmlm, dataSize=8 }; + +def macroop EXTRACTPS_R_XMM_I { + extractps reg, xmmlm, "IMMEDIATE & mask(2)", size=8 + extractps reg, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi +}; + +def macroop EXTRACTPS_M_XMM_I { + extractps t1, xmmlm, "IMMEDIATE & mask(2)", size=8 + extractps t1, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi + st t1, seg, sib, disp +}; + +def macroop EXTRACTPS_P_XMM_I { + rdip t7 + extractps t1, xmmlm, "IMMEDIATE & mask(2)", size=8 + extractps t1, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi + st t1, seg, riprel, disp +}; + +def macroop INSERTPS_XMM_XMM_I { + movfp ufp1, xmml, dataSize=8 + insertps xmml, xmmh, xmmlm, xmmhm, "IMMEDIATE", size=8 + insertps xmmh, ufp1, xmmlm, xmmhm, "IMMEDIATE", size=8, ext=PartHi +}; + +def macroop INSERTPS_XMM_M_I { + movfp ufp1, xmml, dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT", dataSize=4 + insertps xmml, xmmh, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8 + insertps xmmh, ufp1, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8, ext=PartHi +}; + +def macroop INSERTPS_XMM_P_I { + rdip t7 + movfp ufp1, xmml, dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT", dataSize=4 + insertps xmml, xmmh, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8 + insertps xmmh, ufp1, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8, ext=PartHi +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py index 01ae49f88e..c2de13b845 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py @@ -35,6 +35,7 @@ categories = [ "addition", + "absolute", "subtraction", "multiplication", "multiply_add", diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py new file mode 100644 index 0000000000..daea1b7902 --- /dev/null +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py @@ -0,0 +1,96 @@ +# Copyright (c) 2007 The Hewlett-Packard Development Company +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +microcode = """ +def macroop PABSB_XMM_XMM { + pabs xmml, xmmlm, size=1 + pabs xmmh, xmmhm, size=1 +}; + +def macroop PABSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=1 + pabs xmmh, ufp2, size=1 +}; + +def macroop PABSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=1 + pabs xmmh, ufp2, size=1 +}; + +def macroop PABSW_XMM_XMM { + pabs xmml, xmmlm, size=2 + pabs xmmh, xmmhm, size=2 +}; + +def macroop PABSW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=2 + pabs xmmh, ufp2, size=2 +}; + +def macroop PABSW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=2 + pabs xmmh, ufp2, size=2 +}; + +def macroop PABSD_XMM_XMM { + pabs xmml, xmmlm, size=4 + pabs xmmh, xmmhm, size=4 +}; + +def macroop PABSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=4 + pabs xmmh, ufp2, size=4 +}; + +def macroop PABSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pabs xmml, ufp1, size=4 + pabs xmmh, ufp2, size=4 +}; +""" diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py index 3246686d2c..6cdde2af57 100644 --- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py +++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py @@ -74,6 +74,26 @@ def macroop PMULLW_XMM_P { mmuli xmmh, xmmh, ufp2, size=2, ext=Signed }; +def macroop PMULLD_XMM_XMM { + mmuli xmml, xmml, xmmlm, size=4, ext=Signed + mmuli xmmh, xmmh, xmmhm, size=4, ext=Signed +}; + +def macroop PMULLD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=4, ext=Signed + mmuli xmmh, xmmh, ufp2, size=4, ext=Signed +}; + +def macroop PMULLD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, size=4, ext=Signed + mmuli xmmh, xmmh, ufp2, size=4, ext=Signed +}; + def macroop PMULHUW_XMM_XMM { mmuli xmml, xmml, xmmlm, size=2, ext = MultHi mmuli xmmh, xmmh, xmmhm, size=2, ext = MultHi @@ -113,4 +133,24 @@ def macroop PMULUDQ_XMM_P { mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar }; + +def macroop PMULDQ_XMM_XMM { + mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=Scalar + "|" + Signed + mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=Scalar + "|" + Signed +}; + +def macroop PMULDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + "|" + Signed + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar + "|" + Signed +}; + +def macroop PMULDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + "|" + Signed + mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar + "|" + Signed +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py index 7fb4fe621f..548a00e93f 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py @@ -153,4 +153,24 @@ def macroop PCMPGTD_XMM_P { mcmpi2r xmml, xmml, ufp1, size=4, ext=2 mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2 }; + +def macroop PCMPGTQ_XMM_XMM { + mcmpi2r xmml, xmml, xmmlm, size=8, ext=2 + mcmpi2r xmmh, xmmh, xmmhm, size=8, ext=2 +}; + +def macroop PCMPGTQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=8, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=8, ext=2 +}; + +def macroop PCMPGTQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mcmpi2r xmml, xmml, ufp1, size=8, ext=2 + mcmpi2r xmmh, xmmh, ufp2, size=8, ext=2 +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py index 7e863091a0..5793118e01 100644 --- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py +++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py @@ -54,6 +54,66 @@ def macroop PMINUB_XMM_P { mmini xmmh, xmmh, ufp2, size=1, ext=0 }; +def macroop PMINUW_XMM_XMM { + mmini xmml, xmml, xmmlm, size=2, ext=0 + mmini xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PMINUW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=2, ext=0 + mmini xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMINUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=2, ext=0 + mmini xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMINUD_XMM_XMM { + mmini xmml, xmml, xmmlm, size=4, ext=0 + mmini xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PMINUD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=0 + mmini xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMINUD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=0 + mmini xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMINSB_XMM_XMM { + mmini xmml, xmml, xmmlm, size=1, ext=Signed + mmini xmmh, xmmh, xmmhm, size=1, ext=Signed +}; + +def macroop PMINSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=1, ext=Signed + mmini xmmh, xmmh, ufp2, size=1, ext=Signed +}; + +def macroop PMINSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=1, ext=Signed + mmini xmmh, xmmh, ufp2, size=1, ext=Signed +}; + def macroop PMINSW_XMM_XMM { mmini xmml, xmml, xmmlm, size=2, ext=Signed mmini xmmh, xmmh, xmmhm, size=2, ext=Signed @@ -74,6 +134,26 @@ def macroop PMINSW_XMM_P { mmini xmmh, xmmh, ufp2, size=2, ext=Signed }; +def macroop PMINSD_XMM_XMM { + mmini xmml, xmml, xmmlm, size=4, ext=Signed + mmini xmmh, xmmh, xmmhm, size=4, ext=Signed +}; + +def macroop PMINSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=Signed + mmini xmmh, xmmh, ufp2, size=4, ext=Signed +}; + +def macroop PMINSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmini xmml, xmml, ufp1, size=4, ext=Signed + mmini xmmh, xmmh, ufp2, size=4, ext=Signed +}; + def macroop PMAXUB_XMM_XMM { mmaxi xmml, xmml, xmmlm, size=1, ext=0 mmaxi xmmh, xmmh, xmmhm, size=1, ext=0 @@ -94,6 +174,66 @@ def macroop PMAXUB_XMM_P { mmaxi xmmh, xmmh, ufp2, size=1, ext=0 }; +def macroop PMAXUW_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=2, ext=0 + mmaxi xmmh, xmmh, xmmhm, size=2, ext=0 +}; + +def macroop PMAXUW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=2, ext=0 + mmaxi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMAXUW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=2, ext=0 + mmaxi xmmh, xmmh, ufp2, size=2, ext=0 +}; + +def macroop PMAXUD_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=4, ext=0 + mmaxi xmmh, xmmh, xmmhm, size=4, ext=0 +}; + +def macroop PMAXUD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=0 + mmaxi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMAXUD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=0 + mmaxi xmmh, xmmh, ufp2, size=4, ext=0 +}; + +def macroop PMAXSB_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=1, ext=Signed + mmaxi xmmh, xmmh, xmmhm, size=1, ext=Signed +}; + +def macroop PMAXSB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=1, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=1, ext=Signed +}; + +def macroop PMAXSB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=1, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=1, ext=Signed +}; + def macroop PMAXSW_XMM_XMM { mmaxi xmml, xmml, xmmlm, size=2, ext=Signed mmaxi xmmh, xmmh, xmmhm, size=2, ext=Signed @@ -113,4 +253,24 @@ def macroop PMAXSW_XMM_P { mmaxi xmml, xmml, ufp1, size=2, ext=Signed mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed }; + +def macroop PMAXSD_XMM_XMM { + mmaxi xmml, xmml, xmmlm, size=4, ext=Signed + mmaxi xmmh, xmmh, xmmhm, size=4, ext=Signed +}; + +def macroop PMAXSD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=4, ext=Signed +}; + +def macroop PMAXSD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + mmaxi xmml, xmml, ufp1, size=4, ext=Signed + mmaxi xmmh, xmmh, ufp2, size=4, ext=Signed +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py index f955cbaa16..8d14aa296e 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py @@ -34,11 +34,96 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. microcode = """ +def macroop PEXTRB_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + +def macroop PEXTRB_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1 + st t1, seg, sib, disp, dataSize=1 +}; + +def macroop PEXTRB_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1 + st t1, seg, riprel, disp, dataSize=1 +}; + def macroop PEXTRW_R_XMM_I { mov2int reg, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1 mov2int reg, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1 }; +def macroop PEXTRW_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1 + st t1, seg, sib, disp, dataSize=2 +}; + +def macroop PEXTRW_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=2, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=2, ext=1 + st t1, seg, riprel, disp, dataSize=2 +}; + +def macroop PEXTRD_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PEXTRD_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1 + st t1, seg, sib, disp, dataSize=4 +}; + +def macroop PEXTRD_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1 + st t1, seg, riprel, disp, dataSize=4 +}; + +def macroop PEXTRQ_R_XMM_I { + mov2int reg, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2int reg, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1 +}; + +def macroop PEXTRQ_M_XMM_I { + mov2int t1, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1 + st t1, seg, sib, disp, dataSize=8 +}; + +def macroop PEXTRQ_P_XMM_I { + rdip t7 + mov2int t1, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2int t1, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1 + st t1, seg, riprel, disp, dataSize=8 +}; + +def macroop PINSRB_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + +def macroop PINSRB_XMM_M_I { + ld t1, seg, sib, disp, dataSize=1 + mov2fp xmml, t1, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + +def macroop PINSRB_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=1 + mov2fp xmml, t1, "IMMEDIATE & mask(4)", size=1, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(4)", size=1, ext=1 +}; + def macroop PINSRW_XMM_R_I { mov2fp xmml, regm, "IMMEDIATE & mask(3)", size=2, ext=1 mov2fp xmmh, regm, "IMMEDIATE & mask(3)", size=2, ext=1 @@ -56,4 +141,40 @@ def macroop PINSRW_XMM_P_I { mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1 mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1 }; + +def macroop PINSRD_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PINSRD_XMM_M_I { + ld t1, seg, sib, disp, dataSize=4 + mov2fp xmml, t1, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PINSRD_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=4 + mov2fp xmml, t1, "IMMEDIATE & mask(2)", size=4, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(2)", size=4, ext=1 +}; + +def macroop PINSRQ_XMM_R_I { + mov2fp xmml, regm, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2fp xmmh, regm, "IMMEDIATE & mask(1)", size=8, ext=1 +}; + +def macroop PINSRQ_XMM_M_I { + ld t1, seg, sib, disp, dataSize=8 + mov2fp xmml, t1, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(1)", size=8, ext=1 +}; + +def macroop PINSRQ_XMM_P_I { + rdip t7 + ld t1, seg, riprel, disp, dataSize=8 + mov2fp xmml, t1, "IMMEDIATE & mask(1)", size=8, ext=1 + mov2fp xmmh, t1, "IMMEDIATE & mask(1)", size=8, ext=1 +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py index 2307ecfcef..7457e1fa85 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py @@ -76,6 +76,27 @@ def macroop PACKSSWB_XMM_P { pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 }; +def macroop PACKUSDW_XMM_XMM { + pack ufp1, xmml, xmmh, ext=0, srcSize=4, destSize=2 + pack xmmh, xmmlm, xmmhm, ext=0, srcSize=4, destSize=2 + movfp xmml, ufp1, dataSize=8 +}; + +def macroop PACKUSDW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=0, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=0, srcSize=4, destSize=2 +}; + +def macroop PACKUSDW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + pack xmml, xmml, xmmh, ext=0, srcSize=4, destSize=2 + pack xmmh, ufp1, ufp2, ext=0, srcSize=4, destSize=2 +}; + def macroop PACKUSWB_XMM_XMM { pack ufp1, xmml, xmmh, ext=0, srcSize=2, destSize=1 pack xmmh, xmmlm, xmmhm, ext=0, srcSize=2, destSize=1 @@ -85,8 +106,8 @@ def macroop PACKUSWB_XMM_XMM { def macroop PACKUSWB_XMM_M { ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 - pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1 - pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1 + pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1 + pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1 }; def macroop PACKUSWB_XMM_P { diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py index 0fe09b6d37..946d59f6b5 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py @@ -112,4 +112,124 @@ def macroop PSHUFB_XMM_P { movfp xmml, ufp1, dataSize=8 movfp xmmh, ufp2, dataSize=8 }; + +def macroop PBLENDW_XMM_XMM_I { + blend xmml, xmmlm, "IMMEDIATE & mask(8)", size=2, ext=0 + blend xmmh, xmmhm, "IMMEDIATE & mask(8)", size=2, ext=1 +}; + +def macroop PBLENDW_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(8)", size=2, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(8)", size=2, ext=1 +}; + +def macroop PBLENDW_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(8)", size=2, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(8)", size=2, ext=1 +}; + +def macroop BLENDPS_XMM_XMM_I { + blend xmml, xmmlm, "IMMEDIATE & mask(4)", size=4, ext=0 + blend xmmh, xmmhm, "IMMEDIATE & mask(4)", size=4, ext=1 +}; + +def macroop BLENDPS_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(4)", size=4, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(4)", size=4, ext=1 +}; + +def macroop BLENDPS_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(4)", size=4, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(4)", size=4, ext=1 +}; + +def macroop BLENDPD_XMM_XMM_I { + blend xmml, xmmlm, "IMMEDIATE & mask(2)", size=8, ext=0 + blend xmmh, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=1 +}; + +def macroop BLENDPD_XMM_M_I { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(2)", size=8, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(2)", size=8, ext=1 +}; + +def macroop BLENDPD_XMM_P_I { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blend xmml, ufp1, "IMMEDIATE & mask(2)", size=8, ext=0 + blend xmmh, ufp2, "IMMEDIATE & mask(2)", size=8, ext=1 +}; + +def macroop BLENDVPD_XMM_XMM { + blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=8 + blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=8 +}; + +def macroop BLENDVPD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=8 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=8 +}; + +def macroop BLENDVPD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=8 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=8 +}; + +def macroop BLENDVPS_XMM_XMM { + blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=4 + blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=4 +}; + +def macroop BLENDVPS_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=4 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=4 +}; + +def macroop BLENDVPS_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=4 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=4 +}; + +def macroop PBLENDVB_XMM_XMM { + blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=1 + blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=1 +}; + +def macroop PBLENDVB_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=1 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=1 +}; + +def macroop PBLENDVB_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=1 + blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=1 +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py index a539b156fc..4117b59325 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py @@ -33,7 +33,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -categories = ["move", "move_non_temporal", "move_mask"] +categories = ["move", "move_non_temporal", "move_mask", "move_with_shift"] microcode = """ # 128 bit multimedia and scientific data transfer instructions diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py index 576b5dc81c..fb2055ec97 100644 --- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py @@ -132,4 +132,233 @@ def macroop LDDQU_XMM_P { ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8 ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8 }; + +def macroop PMOVSXDQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXWD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBW_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi +}; + +def macroop PMOVSXBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed + extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi +}; + + +def macroop PMOVZXDQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4 + extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi +}; + +def macroop PMOVZXDQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4 + extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi +}; + +def macroop PMOVZXDQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=4 + extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi +}; + +def macroop PMOVZXWQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=2 + extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2 + extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=8, srcSize=2 + extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2 + extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2 + extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXWD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=2 + extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi +}; + +def macroop PMOVZXBQ_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=8, srcSize=1 + extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBQ_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1 + extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBQ_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=2 + extmove xmml, ufp1, destSize=8, srcSize=1 + extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBD_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=4, srcSize=1 + extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBD_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1 + extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBD_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4 + extmove xmml, ufp1, destSize=4, srcSize=1 + extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBW_XMM_XMM { + movfp ufp1, xmmlm, dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1 + extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBW_XMM_M { + ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1 + extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi +}; + +def macroop PMOVZXBW_XMM_P { + rdip t7 + ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8 + extmove xmml, ufp1, destSize=2, srcSize=1 + extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi +}; """ diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py new file mode 100644 index 0000000000..155e46b2ea --- /dev/null +++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py @@ -0,0 +1,59 @@ +# Copyright (c) 2007 The Hewlett-Packard Development Company +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.1 + +microcode = """ +def macroop PALIGNR_XMM_XMM_I { + movfp ufp1, xmml, dataSize=8 + palignr xmml, xmmh, xmmlm, xmmhm, "IMMEDIATE", size=8 + palignr xmmh, ufp1, xmmlm, xmmhm, "IMMEDIATE", size=8, ext=PartHi +}; + +def macroop PALIGNR_XMM_M_I { + ldfp ufp2, seg, sib, "DISPLACEMENT", dataSize=8 + ldfp ufp3, seg, sib, "DISPLACEMENT + 8", dataSize=8 + movfp ufp1, xmml, dataSize=8 + palignr xmml, xmmh, ufp2, ufp3, "IMMEDIATE", size=8 + palignr xmmh, ufp1, ufp2, ufp3, "IMMEDIATE", size=8, ext=PartHi +}; + +def macroop PALIGNR_XMM_P_I { + rdip t7 + ldfp ufp2, seg, riprel, "DISPLACEMENT", dataSize=8 + ldfp ufp3, seg, riprel, "DISPLACEMENT + 8", dataSize=8 + movfp ufp1, xmml, dataSize=8 + palignr xmml, xmmh, ufp2, ufp3, "IMMEDIATE", size=8 + palignr xmmh, ufp1, ufp2, ufp3, "IMMEDIATE", size=8, ext=PartHi +}; +""" diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa index 632cb07c92..a5ff1d179f 100644 --- a/src/arch/x86/isa/microasm.isa +++ b/src/arch/x86/isa/microasm.isa @@ -187,7 +187,7 @@ let {{ assembler.symbols[reg] = \ ctrlRegIdx(f"misc_reg::{reg.capitalize()}") - for flag in ('Scalar', 'MultHi', 'Signed'): + for flag in ('Scalar', 'MultHi', 'Signed', 'PartHi'): assembler.symbols[flag] = 'Media%sOp' % flag # Code literal which forces a default 64 bit operand size in 64 bit mode. diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa index aded50b472..a4fee547b6 100644 --- a/src/arch/x86/isa/microops/base.isa +++ b/src/arch/x86/isa/microops/base.isa @@ -105,6 +105,8 @@ let {{ idx_name = 'Src1' class Src2Op(object): idx_name = 'Src2' + class Src3Op(object): + idx_name = 'Src3' class RegisterOp(object): def __init__(self, it): @@ -189,6 +191,9 @@ let {{ class IntSrc2Op(IntOp, Src2Op, Operand): pass + class FloatSrc3Op(FloatOp, Src3Op, Operand): + pass + class Op2(object): @classmethod def isDual(cls): @@ -198,6 +203,13 @@ let {{ FloatType = FloatSrc2Op ImmType = Imm8Op + class Op3(object): + @classmethod + def isDual(cls): + return False + + FloatType = FloatSrc3Op + class X86Microop(object): generatorNameTemplate = "generate_%s_%d" diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa index 5fcf1d378b..599b5faef5 100644 --- a/src/arch/x86/isa/microops/mediaop.isa +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -1554,4 +1554,341 @@ let {{ super().__init__(size=2) op_class = 'FloatMiscOp' code = 'FTW = 0xFFFF;' + + class Blend(Media3Op): + def __init__(self, dest, src1, src2=0, **kwargs): + super().__init__(dest, src1, src2, **kwargs) + operand_types = (FloatDestOp, FloatSrc1Op, Imm8Op) + op_class = 'SimdMiscOp' + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = sizeof(double) / size; + int offset = ext ? items : 0; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + + if (bits(imm8, i + offset)) { + uint64_t resBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndex, loIndex, resBits); + } else { + // do nothing + } + } + ''' + + class Extmove(Media2Op): + op_class = 'SimdMiscOp' + code = ''' + int items = sizeof(double) / destSize; + int offset = partHi() ? items : 0; + int srcBits = srcSize * 8; + int destBits = destSize * 8; + + for (int i = 0; i < items; i++) { + int hiIndexSrc = (i + offset + 1) * srcBits - 1; + int loIndexSrc = (i + offset + 0) * srcBits; + uint64_t resBits = bits(FpSrcReg1_uqw, hiIndexSrc, loIndexSrc); + if (signedOp()) { + resBits = sext(resBits, srcBits); + } else { + // do nothing, already zero-extended + } + + int hiIndexDest = (i + 1) * destBits - 1; + int loIndexDest = (i + 0) * destBits; + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndexDest, + loIndexDest, resBits); + } + ''' + + class Blendxmm(Media3Op): + op_class = 'SimdMiscOp' + operand_types = (FloatDestOp, FloatSrc1Op, FloatSrc2Op) + code = ''' + int size = srcSize; + int sizeBits = size * 8; + int items = sizeof(double) / size; + + for (int i = 0; i < items; i++) { + + int maskBit = bits(FpSrcReg2_uqw, (i + 1) * sizeBits - 1); + + if (maskBit) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t resBits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndex, loIndex, resBits); + } else { + // do nothing + } + } + ''' + + + class Palignr(MediaOp): + operand_types = (FloatDestOp, FloatSrc1Op, + FloatSrc2Op, FloatSrc3Op, Imm8Op) + def __init__(self, op1, op2, op3, op4, op5, **kwargs): + super().__init__(op1, op2, op3, op4, op5, **kwargs) + op_class = 'SimdMiscOp' + code = ''' + int zeroRegisterBytes = partHi() ? 24 : 32; + if (imm8 >= zeroRegisterBytes) { + FpDestReg_uqw = 0; + } else if (imm8 == 16) { + FpDestReg_uqw = FpDestReg_uqw; + } else if (imm8 == 8) { + FpDestReg_uqw = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw; + } else if (imm8 == 0) { + FpDestReg_uqw = partHi() ? FpSrcReg3_uqw : FpSrcReg2_uqw; + } else { + int shift = imm8 * 8; + uint64_t firstFpSrcReg; + uint64_t secondFpSrcReg; + if (imm8 < 8) { + firstFpSrcReg = partHi() ? FpSrcReg3_uqw : FpSrcReg2_uqw; + secondFpSrcReg = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw; + } else if (imm8 < 16) { + shift -=64; + firstFpSrcReg = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw; + secondFpSrcReg = partHi() ? FpDestReg_uqw : FpDestReg_uqw; + } else if (imm8 < 24) { + shift = partHi() ? 192 - shift : shift - 128; + firstFpSrcReg = partHi() ? 0 : FpDestReg_uqw; + secondFpSrcReg = partHi() ? + FpDestReg_uqw >> (64 - shift) : FpSrcReg1_uqw; + } else { // < 32 + shift = 256 - shift; + firstFpSrcReg = 0; + secondFpSrcReg = FpSrcReg1_uqw >> (64 - shift); + } + + FpDestReg_uqw = firstFpSrcReg >> shift; + int hiIndex = 63; + int loIndex = 64 - shift; + FpDestReg_uqw = insertBits(FpDestReg_uqw, hiIndex, + loIndex, secondFpSrcReg); + } + ''' + + class Extractps(Media3Op): + op_class = 'SimdMiscOp' + operand_types = (IntDestOp, FloatSrc1Op, Imm8Op) + code = ''' + if (imm8 < 2 && !partHi()) { + DestReg = (FpSrcReg1_uqw >> (32 * imm8)) & 0xFFFFFFFF; + } else if (imm8 >= 2 && partHi()) { + int shift = imm8 - 2; + DestReg = (FpSrcReg1_uqw >> (32 * shift)) & 0xFFFFFFFF; + } + ''' + + class Phminposuw(Media3Op): + op_class = 'SimdMiscOp' + code = ''' + int minIndex = 0; + uint64_t min = 0xFFFFF; + int destBits = destSize * 8; + + for (int i = 0; i < 128; i += destBits) { + uint64_t FpSrcReg = i < 64 ? FpSrcReg1_uqw : FpSrcReg2_uqw; + uint16_t value = bits(FpSrcReg, i + destBits - 1, i); + if (value < min) { + min = value; + minIndex = i / destBits; + } + } + + FpDestReg_uqw = min; + FpDestReg_uqw = insertBits(FpDestReg_uqw, 64, destBits, minIndex); + ''' + + class Insertps(MediaOp): + operand_types = (FloatDestOp, FloatSrc1Op, + FloatSrc2Op, FloatSrc3Op, Imm8Op) + def __init__(self, op1, op2, op3, op4, op5, **kwargs): + super().__init__(op1, op2, op3, op4, op5, **kwargs) + op_class = 'SimdMiscOp' + code = ''' + int countS = bits(imm8, 7, 6); + int countD = bits(imm8, 5, 4); + int zmask = bits(imm8, 3, 0); + + uint64_t tmp = 0; + switch (countS) { + case 0: { + tmp = bits(FpSrcReg2_uqw, 31, 0); + break; + } + case 1: { + tmp = bits(FpSrcReg2_uqw, 63, 32); + break; + } + case 2: { + tmp = bits(FpSrcReg3_uqw, 31, 0); + break; + } + case 3: { + tmp = bits(FpSrcReg3_uqw, 63, 32); + break; + } + } + + uint64_t tmp2l = partHi() ? FpSrcReg1_uqw : FpDestReg_uqw; + uint64_t tmp2h = partHi() ? FpDestReg_uqw : FpSrcReg1_uqw; + + switch (countD) { + case 0: { + tmp2l = insertBits(tmp2l, 31, 0, tmp); + break; + } + case 1: { + tmp2l = insertBits(tmp2l, 63, 32, tmp); + break; + } + case 2: { + tmp2h = insertBits(tmp2h, 31, 0, tmp); + break; + } + case 3: { + tmp2h = insertBits(tmp2h, 63, 32, tmp); + break; + } + } + + if (!partHi()) { + if (bits(zmask, 0)) { + tmp2l = insertBits(tmp2l, 31, 0, 0); + } + if (bits(zmask, 1)) { + tmp2l = insertBits(tmp2l, 63, 32, 0); + } + FpDestReg_uqw = tmp2l; + } else { + if (bits(zmask, 2)) { + tmp2h = insertBits(tmp2h, 31, 0, 0); + } + if (bits(zmask, 3)) { + tmp2h = insertBits(tmp2h, 63, 32, 0); + } + FpDestReg_uqw = tmp2h; + } + + ''' + + class Rounds(MediaOp): + operand_types = (FloatDestOp, FloatSrc1Op, IntSrc2Op, Imm8Op) + def __init__(self, op1, op2, op3, op4, **kwargs): + super().__init__(op1, op2, op3, op4, **kwargs) + op_class = 'SimdMiscOp' + code = ''' + bool isMXCSR = bits(imm8, 2); + int roundingMode = 0; + if (isMXCSR) { + roundingMode = bits(imm8, 14, 13); + } else { + roundingMode = bits(imm8, 1, 0); + } + + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + double arg; + if (srcSize == 4) { + floatInt fi; + fi.i = bits(FpSrcReg1_uqw, 31, 0); + arg = fi.f; + } else { + doubleInt di; + di.i = bits(FpSrcReg1_uqw, 63, 0); + arg = di.d; + } + + switch (roundingMode) { + case 0: { + // to nearest + arg = std::round(arg); + break; + } + case 1: { + // down + arg = std::floor(arg); + break; + } + case 2: { + // up + arg = std::ceil(arg); + break; + } + case 3: { + // to 0 + arg = std::trunc(arg); + break; + } + } + + int destHiIndex = 0; + uint64_t argBits = 0; + if (destSize == 4) { + floatInt convertBack; + convertBack.f = arg; + argBits = convertBack.i; + destHiIndex = 31; + } else { + doubleInt convertBack; + convertBack.d = arg; + argBits = convertBack.i; + destHiIndex = 63; + } + FpDestReg_uqw = insertBits(FpDestReg_uqw, destHiIndex, 0, argBits); + ''' + + class Pabs(Media2Op): + op_class = 'SimdMiscOp' + code = ''' + int size = srcSize; + int sizeBits = size * 8; + int items = numItems(size); + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex); + if (bits(arg1Bits, sizeBits - 1)) { + if (size == 1) { + uint8_t tmpBits = (uint8_t)arg1Bits; + tmpBits = ~tmpBits + 1; + arg1Bits = tmpBits; + } else if (size == 2) { + uint16_t tmpBits = (uint16_t)arg1Bits; + tmpBits = ~tmpBits + 1; + arg1Bits = tmpBits; + } else if (size == 4) { + uint32_t tmpBits = (uint32_t)arg1Bits; + tmpBits = ~tmpBits + 1; + arg1Bits = tmpBits; + } else { + assert(false); + } + } + + FpDestReg_uqw = + insertBits(FpDestReg_uqw, hiIndex, loIndex, arg1Bits); + } + ''' }}; diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa index d8bc947085..ef635c50cc 100644 --- a/src/arch/x86/isa/operands.isa +++ b/src/arch/x86/isa/operands.isa @@ -165,8 +165,9 @@ def operands {{ 'R9': IntReg('X86ISA::int_reg::R9', 21), 'FpSrcReg1': FloatReg('src1', 22), 'FpSrcReg2': FloatReg('src2', 23), - 'FpDestReg': FloatReg('dest', 24), - 'FpData': FloatReg('data', 25), + 'FpSrcReg3': FloatReg('src3', 24), + 'FpDestReg': FloatReg('dest', 25), + 'FpData': FloatReg('data', 26), 'RIP': PCStateOp('uqw', 'pc', (None, None, 'IsControl'), 50), 'NRIP': PCStateOp('uqw', 'npc', diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh index 288c5cabe9..eecea02981 100644 --- a/src/base/bitfield.hh +++ b/src/base/bitfield.hh @@ -131,6 +131,22 @@ sext(uint64_t val) return val; } +/** + * Sign-extend an N-bit value to 64 bits. Assumes all bits past the sign are + * currently zero. For true sign extension regardless of the value of the sign + * bit, see szext. + * + * @ingroup api_bitfield + */ +constexpr uint64_t +sext(uint64_t val, int N) +{ + bool sign_bit = bits(val, N - 1); + if (sign_bit) + val |= ~mask(N); + return val; +} + /** * Sign-extend an N-bit value to 64 bits. Zero any bits past the sign if * necessary. From fa0795ff5b923e753823abaff1ef46d11f4cc518 Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Tue, 14 Mar 2023 11:19:51 +0800 Subject: [PATCH 262/492] arch-riscv: Add new misa bit union The new misa bit union type can help get and set misa CSR more clearily Change-Id: Id48b140968a0e8021b09782815aa612b409ac75b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68917 Reviewed-by: Bobby Bruce Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Hoa Nguyen --- src/arch/riscv/isa.cc | 43 ++++++++++++++++++++++++------------- src/arch/riscv/regs/misc.hh | 31 ++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 7964de51ec..d744fe369b 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -287,21 +287,33 @@ void ISA::clear() miscRegFile[MISCREG_VENDORID] = 0; miscRegFile[MISCREG_ARCHID] = 0; miscRegFile[MISCREG_IMPID] = 0; + + MISA misa = 0; + STATUS status = 0; + + // default config arch isa string is rv64(32)imafdc + misa.rvi = misa.rvm = misa.rva = misa.rvf = misa.rvd = misa.rvc = 1; + // default privlege modes if MSU + misa.rvs = misa.rvu = 1; + + // mark FS is initial + status.fs = INITIAL; + // rv_type dependent init. switch (rv_type) { case RV32: - miscRegFile[MISCREG_ISA] = (1ULL << MXL_OFFSETS[RV32]) | 0x14112D; - miscRegFile[MISCREG_STATUS] = (1ULL << FS_OFFSET); - break; + misa.rv32_mxl = 1; + break; case RV64: - miscRegFile[MISCREG_ISA] = (2ULL << MXL_OFFSETS[RV64]) | 0x14112D; - miscRegFile[MISCREG_STATUS] = (2ULL << UXL_OFFSET) | - (2ULL << SXL_OFFSET) | - (1ULL << FS_OFFSET); - break; + misa.rv64_mxl = 2; + status.uxl = status.sxl = 2; + break; default: - panic("%s: Unknown rv_type: %d", name(), (int)rv_type); + panic("%s: Unknown rv_type: %d", name(), (int)rv_type); } + + miscRegFile[MISCREG_ISA] = misa; + miscRegFile[MISCREG_STATUS] = status; miscRegFile[MISCREG_MCOUNTEREN] = 0x7; miscRegFile[MISCREG_SCOUNTEREN] = 0x7; // don't set it to zero; software may try to determine the supported @@ -425,10 +437,10 @@ ISA::readMiscReg(RegIndex idx) case MISCREG_SEPC: case MISCREG_MEPC: { - auto misa = readMiscRegNoEffect(MISCREG_ISA); + MISA misa = readMiscRegNoEffect(MISCREG_ISA); auto val = readMiscRegNoEffect(idx); // if compressed instructions are disabled, epc[1] is set to 0 - if ((misa & ISA_EXT_C_MASK) == 0) + if (misa.rvc == 0) return mbits(val, 63, 2); // epc[0] is always 0 else @@ -617,15 +629,16 @@ ISA::setMiscReg(RegIndex idx, RegVal val) break; case MISCREG_ISA: { - auto cur_val = readMiscRegNoEffect(idx); + MISA cur_misa = (MISA)readMiscRegNoEffect(MISCREG_ISA); + MISA new_misa = (MISA)val; // only allow to disable compressed instructions // if the following instruction is 4-byte aligned - if ((val & ISA_EXT_C_MASK) == 0 && + if (new_misa.rvc == 0 && bits(tc->pcState().as().npc(), 2, 0) != 0) { - val |= cur_val & ISA_EXT_C_MASK; + new_misa.rvc = new_misa.rvc | cur_misa.rvc; } - setMiscRegNoEffect(idx, val); + setMiscRegNoEffect(idx, new_misa); } break; case MISCREG_STATUS: diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh index 8cb4ca0f91..5ea3536141 100644 --- a/src/arch/riscv/regs/misc.hh +++ b/src/arch/riscv/regs/misc.hh @@ -752,6 +752,37 @@ BitUnion64(STATUS) Bitfield<0> uie; EndBitUnion(STATUS) +/** + * These fields are specified in the RISC-V Instruction Set Manual, Volume II, + * v1.10, v1.11 and v1.12 in Figure 3.1, accessible at www.riscv.org. The register + * is used to control instruction extensions. + */ +BitUnion64(MISA) + Bitfield<63, 62> rv64_mxl; + Bitfield<31, 30> rv32_mxl; + Bitfield<23> rvx; + Bitfield<21> rvv; + Bitfield<20> rvu; + Bitfield<19> rvt; + Bitfield<18> rvs; + Bitfield<16> rvq; + Bitfield<15> rvp; + Bitfield<13> rvn; + Bitfield<12> rvm; + Bitfield<11> rvl; + Bitfield<10> rvk; + Bitfield<9> rvj; + Bitfield<8> rvi; + Bitfield<7> rvh; + Bitfield<6> rvg; + Bitfield<5> rvf; + Bitfield<4> rve; + Bitfield<3> rvd; + Bitfield<2> rvc; + Bitfield<1> rvb; + Bitfield<0> rva; +EndBitUnion(MISA) + /** * These fields are specified in the RISC-V Instruction Set Manual, Volume II, * v1.10 in Figures 3.11 and 3.12, accessible at www.riscv.org. Both the MIP From b305019ac4420063dee4aaf15162fa779b81dea6 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Thu, 9 Mar 2023 10:13:21 -0800 Subject: [PATCH 263/492] python: Replace 'getargspec' with 'signature' in SimObject.py In Python 3.11 'inspect.getargspec' has been removed. It has been marked for deprecation since 3.5. The SimObject.py class has therefore been rewritten to use 'inspect.signature' instead. Change-Id: I9efd831e05e0b1619f366ffe722abb0a072fd519 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68817 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/python/m5/SimObject.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index 6caa532897..354a8288cd 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -478,19 +478,21 @@ def cxxMethod(*args, **kwargs): return_value_policy = kwargs.get("return_value_policy", None) static = kwargs.get("static", False) - args, varargs, keywords, defaults = inspect.getargspec(func) - if varargs or keywords: - raise ValueError( - "Wrapped methods must not contain variable arguments" - ) - - # Create tuples of (argument, default) - if defaults: - args = args[: -len(defaults)] + list( - zip(args[-len(defaults) :], defaults) - ) - # Don't include self in the argument list to PyBind - args = args[1:] + # Create a list of tuples of (argument, default). The `PyBindMethod` + # class expects the `args` argument to be a list of either argument + # names, in the case that argument does not have a default value, and + # a tuple of (argument, default) in the casae where an argument does. + args = [] + sig = inspect.signature(func) + for param_name in sig.parameters.keys(): + if param_name == "self": + # We don't cound 'self' as an argument in this case. + continue + param = sig.parameters[param_name] + if param.default is param.empty: + args.append(param_name) + else: + args.append((param_name, param.default)) @wraps(func) def cxx_call(self, *args, **kwargs): From 07fca546e6e85ed5a5c5d729f12cda2cc6428ce0 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Thu, 9 Mar 2023 10:23:23 -0800 Subject: [PATCH 264/492] ext: Update Pybind to Version 2.10.3 Updating Pybind11 is necessary for gem5 to compile correctly with Python 3.11. As of March 9th 2023, 2.10.3 is the latest version of Pybind11. Change-Id: I32c68c507770040d3fac2de442d88a8f46b48896 Issue-on: https://gem5.atlassian.net/browse/GEM5-1295 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68818 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Jason Lowe-Power --- ext/pybind11/.appveyor.yml | 6 +- ext/pybind11/.clang-format | 21 +- ext/pybind11/.clang-tidy | 82 +- ext/pybind11/.codespell-ignore-lines | 24 + ext/pybind11/.gitattributes | 1 + ext/pybind11/.github/CODEOWNERS | 9 + ext/pybind11/.github/CONTRIBUTING.md | 60 +- .../.github/ISSUE_TEMPLATE/bug-report.md | 28 - .../.github/ISSUE_TEMPLATE/bug-report.yml | 61 + .../.github/ISSUE_TEMPLATE/config.yml | 3 + .../.github/ISSUE_TEMPLATE/feature-request.md | 16 - .../.github/ISSUE_TEMPLATE/question.md | 21 - ext/pybind11/.github/dependabot.yml | 9 - ext/pybind11/.github/matchers/pylint.json | 32 + ext/pybind11/.github/pull_request_template.md | 4 + ext/pybind11/.github/workflows/ci.yml | 638 ++++-- ext/pybind11/.github/workflows/configure.yml | 24 +- ext/pybind11/.github/workflows/format.yml | 25 +- ext/pybind11/.github/workflows/labeler.yml | 6 +- ext/pybind11/.github/workflows/pip.yml | 39 +- ext/pybind11/.github/workflows/upstream.yml | 114 + ext/pybind11/.gitignore | 3 + ext/pybind11/.pre-commit-config.yaml | 137 +- ext/pybind11/CMakeLists.txt | 35 +- ext/pybind11/MANIFEST.in | 1 - ext/pybind11/README.rst | 18 +- ext/pybind11/docs/Doxyfile | 4 +- ext/pybind11/docs/_static/css/custom.css | 3 + ext/pybind11/docs/_static/theme_overrides.css | 11 - ext/pybind11/docs/advanced/cast/custom.rst | 6 +- ext/pybind11/docs/advanced/cast/eigen.rst | 2 +- ext/pybind11/docs/advanced/cast/overview.rst | 183 +- ext/pybind11/docs/advanced/cast/stl.rst | 8 +- ext/pybind11/docs/advanced/cast/strings.rst | 19 +- ext/pybind11/docs/advanced/classes.rst | 98 +- ext/pybind11/docs/advanced/exceptions.rst | 17 +- ext/pybind11/docs/advanced/functions.rst | 60 +- ext/pybind11/docs/advanced/misc.rst | 77 +- ext/pybind11/docs/advanced/pycpp/numpy.rst | 16 +- ext/pybind11/docs/advanced/smart_ptrs.rst | 2 +- ext/pybind11/docs/basics.rst | 11 +- ext/pybind11/docs/benchmark.py | 28 +- ext/pybind11/docs/changelog.rst | 610 +++++- ext/pybind11/docs/classes.rst | 19 +- ext/pybind11/docs/compiling.rst | 30 +- ext/pybind11/docs/conf.py | 28 +- ext/pybind11/docs/faq.rst | 42 +- ext/pybind11/docs/pybind11-logo.png | Bin 58510 -> 61034 bytes ext/pybind11/docs/release.rst | 9 +- ext/pybind11/docs/requirements.txt | 14 +- ext/pybind11/docs/upgrade.rst | 6 +- ext/pybind11/include/pybind11/attr.h | 285 ++- ext/pybind11/include/pybind11/buffer_info.h | 121 +- ext/pybind11/include/pybind11/cast.h | 1081 ++++++---- ext/pybind11/include/pybind11/chrono.h | 148 +- ext/pybind11/include/pybind11/complex.h | 31 +- ext/pybind11/include/pybind11/detail/class.h | 312 +-- ext/pybind11/include/pybind11/detail/common.h | 992 +++++---- ext/pybind11/include/pybind11/detail/descr.h | 100 +- ext/pybind11/include/pybind11/detail/init.h | 282 ++- .../include/pybind11/detail/internals.h | 302 ++- .../pybind11/detail/type_caster_base.h | 590 +++--- ext/pybind11/include/pybind11/detail/typeid.h | 28 +- ext/pybind11/include/pybind11/eigen.h | 596 +----- ext/pybind11/include/pybind11/eigen/matrix.h | 699 ++++++ ext/pybind11/include/pybind11/eigen/tensor.h | 509 +++++ ext/pybind11/include/pybind11/embed.h | 155 +- ext/pybind11/include/pybind11/eval.h | 103 +- ext/pybind11/include/pybind11/functional.h | 46 +- ext/pybind11/include/pybind11/gil.h | 118 +- ext/pybind11/include/pybind11/iostream.h | 68 +- ext/pybind11/include/pybind11/numpy.h | 1309 +++++++----- ext/pybind11/include/pybind11/operators.h | 231 +- ext/pybind11/include/pybind11/options.h | 57 +- ext/pybind11/include/pybind11/pybind11.h | 1878 ++++++++++------- ext/pybind11/include/pybind11/pytypes.h | 1598 +++++++++----- ext/pybind11/include/pybind11/stl.h | 243 ++- .../include/pybind11/stl/filesystem.h | 75 +- ext/pybind11/include/pybind11/stl_bind.h | 676 +++--- ext/pybind11/noxfile.py | 40 +- ext/pybind11/pybind11/__init__.py | 10 +- ext/pybind11/pybind11/__main__.py | 18 +- ext/pybind11/pybind11/_version.py | 6 +- ext/pybind11/pybind11/_version.pyi | 6 - ext/pybind11/pybind11/commands.py | 32 +- ext/pybind11/pybind11/setup_helpers.py | 232 +- ext/pybind11/pybind11/setup_helpers.pyi | 63 - ext/pybind11/pyproject.toml | 54 +- ext/pybind11/setup.cfg | 34 +- ext/pybind11/setup.py | 141 +- ext/pybind11/tests/CMakeLists.txt | 232 +- ext/pybind11/tests/conftest.py | 50 +- ext/pybind11/tests/constructor_stats.h | 143 +- ext/pybind11/tests/cross_module_gil_utils.cpp | 111 +- ...s_module_interleaved_error_already_set.cpp | 51 + .../tests/eigen_tensor_avoid_stl_array.cpp | 14 + ext/pybind11/tests/env.py | 5 - .../tests/extra_python_package/test_files.py | 161 +- .../extra_setuptools/test_setuphelper.py | 28 +- ext/pybind11/tests/local_bindings.h | 41 +- ext/pybind11/tests/object.h | 100 +- .../tests/pybind11_cross_module_tests.cpp | 86 +- ext/pybind11/tests/pybind11_tests.cpp | 60 +- ext/pybind11/tests/pybind11_tests.h | 34 +- ext/pybind11/tests/pytest.ini | 9 +- ext/pybind11/tests/requirements.txt | 17 +- ext/pybind11/tests/test_async.cpp | 5 +- ext/pybind11/tests/test_async.py | 1 - ext/pybind11/tests/test_buffers.cpp | 82 +- ext/pybind11/tests/test_buffers.py | 16 +- ext/pybind11/tests/test_builtin_casters.cpp | 328 ++- ext/pybind11/tests/test_builtin_casters.py | 220 +- ext/pybind11/tests/test_call_policies.cpp | 44 +- ext/pybind11/tests/test_call_policies.py | 1 - ext/pybind11/tests/test_callbacks.cpp | 117 +- ext/pybind11/tests/test_callbacks.py | 26 +- ext/pybind11/tests/test_chrono.cpp | 33 +- ext/pybind11/tests/test_chrono.py | 3 +- ext/pybind11/tests/test_class.cpp | 369 ++-- ext/pybind11/tests/test_class.py | 30 +- ext/pybind11/tests/test_cmake_build/embed.cpp | 8 +- ext/pybind11/tests/test_cmake_build/test.py | 6 +- ext/pybind11/tests/test_const_name.cpp | 55 + ext/pybind11/tests/test_const_name.py | 29 + .../tests/test_constants_and_functions.cpp | 98 +- .../tests/test_constants_and_functions.py | 1 - ext/pybind11/tests/test_copy_move.cpp | 191 +- ext/pybind11/tests/test_copy_move.py | 8 +- .../tests/test_custom_type_casters.cpp | 146 +- .../tests/test_custom_type_casters.py | 11 +- ext/pybind11/tests/test_custom_type_setup.py | 2 - ext/pybind11/tests/test_docstring_options.cpp | 102 +- ext/pybind11/tests/test_docstring_options.py | 24 +- .../{test_eigen.cpp => test_eigen_matrix.cpp} | 246 ++- .../{test_eigen.py => test_eigen_matrix.py} | 107 +- ext/pybind11/tests/test_eigen_tensor.cpp | 18 + ext/pybind11/tests/test_eigen_tensor.inl | 333 +++ ext/pybind11/tests/test_eigen_tensor.py | 296 +++ ext/pybind11/tests/test_embed/CMakeLists.txt | 4 +- ext/pybind11/tests/test_embed/catch.cpp | 29 +- .../tests/test_embed/external_module.cpp | 9 +- .../tests/test_embed/test_interpreter.cpp | 216 +- .../tests/test_embed/test_interpreter.py | 3 +- .../tests/test_embed/test_trampoline.py | 16 + ext/pybind11/tests/test_enum.cpp | 81 +- ext/pybind11/tests/test_enum.py | 12 +- ext/pybind11/tests/test_eval.cpp | 31 +- ext/pybind11/tests/test_eval.py | 3 +- ext/pybind11/tests/test_eval_call.py | 1 - ext/pybind11/tests/test_exceptions.cpp | 185 +- ext/pybind11/tests/test_exceptions.h | 1 + ext/pybind11/tests/test_exceptions.py | 150 +- .../tests/test_factory_constructors.cpp | 109 +- .../tests/test_factory_constructors.py | 10 +- ext/pybind11/tests/test_gil_scoped.cpp | 139 +- ext/pybind11/tests/test_gil_scoped.py | 228 +- ext/pybind11/tests/test_iostream.cpp | 33 +- ext/pybind11/tests/test_iostream.py | 40 +- .../tests/test_kwargs_and_defaults.cpp | 240 ++- .../tests/test_kwargs_and_defaults.py | 122 +- ext/pybind11/tests/test_local_bindings.cpp | 41 +- ext/pybind11/tests/test_local_bindings.py | 3 +- .../tests/test_methods_and_attributes.cpp | 199 +- .../tests/test_methods_and_attributes.py | 76 +- ext/pybind11/tests/test_modules.cpp | 67 +- ext/pybind11/tests/test_modules.py | 31 +- .../tests/test_multiple_inheritance.cpp | 220 +- .../tests/test_multiple_inheritance.py | 147 +- ext/pybind11/tests/test_numpy_array.cpp | 308 +-- ext/pybind11/tests/test_numpy_array.py | 20 +- ext/pybind11/tests/test_numpy_dtypes.cpp | 368 ++-- ext/pybind11/tests/test_numpy_dtypes.py | 55 +- ext/pybind11/tests/test_numpy_vectorize.cpp | 32 +- ext/pybind11/tests/test_numpy_vectorize.py | 1 - ext/pybind11/tests/test_opaque_types.cpp | 18 +- ext/pybind11/tests/test_opaque_types.py | 5 +- .../tests/test_operator_overloading.cpp | 184 +- .../tests/test_operator_overloading.py | 12 +- ext/pybind11/tests/test_pickling.cpp | 45 +- ext/pybind11/tests/test_pickling.py | 21 +- ext/pybind11/tests/test_pytypes.cpp | 468 +++- ext/pybind11/tests/test_pytypes.py | 448 +++- .../tests/test_sequences_and_iterators.cpp | 289 ++- .../tests/test_sequences_and_iterators.py | 30 +- ext/pybind11/tests/test_smart_ptr.cpp | 104 +- ext/pybind11/tests/test_smart_ptr.py | 13 +- ext/pybind11/tests/test_stl.cpp | 278 +-- ext/pybind11/tests/test_stl.py | 25 +- ext/pybind11/tests/test_stl_binders.cpp | 93 +- ext/pybind11/tests/test_stl_binders.py | 43 +- .../tests/test_tagbased_polymorphic.cpp | 97 +- .../tests/test_tagbased_polymorphic.py | 1 - ext/pybind11/tests/test_thread.cpp | 10 +- ext/pybind11/tests/test_thread.py | 6 +- ext/pybind11/tests/test_union.py | 1 - ext/pybind11/tests/test_virtual_functions.cpp | 334 +-- ext/pybind11/tests/test_virtual_functions.py | 44 +- ext/pybind11/tools/FindCatch.cmake | 2 + ext/pybind11/tools/FindPythonLibsNew.cmake | 66 +- ext/pybind11/tools/JoinPaths.cmake | 23 + .../codespell_ignore_lines_from_errors.py | 35 + ext/pybind11/tools/libsize.py | 7 +- ext/pybind11/tools/make_changelog.py | 10 +- ext/pybind11/tools/pybind11.pc.in | 7 + ext/pybind11/tools/pybind11Common.cmake | 50 +- ext/pybind11/tools/pybind11Config.cmake.in | 10 +- ext/pybind11/tools/pybind11NewTools.cmake | 40 +- ext/pybind11/tools/pybind11Tools.cmake | 48 +- ext/pybind11/tools/setup_global.py.in | 14 +- ext/pybind11/tools/setup_main.py.in | 9 +- 210 files changed, 17229 insertions(+), 8978 deletions(-) create mode 100644 ext/pybind11/.codespell-ignore-lines create mode 100644 ext/pybind11/.gitattributes create mode 100644 ext/pybind11/.github/CODEOWNERS delete mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md create mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml delete mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md delete mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/question.md create mode 100644 ext/pybind11/.github/matchers/pylint.json create mode 100644 ext/pybind11/.github/workflows/upstream.yml create mode 100644 ext/pybind11/docs/_static/css/custom.css delete mode 100644 ext/pybind11/docs/_static/theme_overrides.css create mode 100644 ext/pybind11/include/pybind11/eigen/matrix.h create mode 100644 ext/pybind11/include/pybind11/eigen/tensor.h delete mode 100644 ext/pybind11/pybind11/_version.pyi delete mode 100644 ext/pybind11/pybind11/setup_helpers.pyi create mode 100644 ext/pybind11/tests/cross_module_interleaved_error_already_set.cpp create mode 100644 ext/pybind11/tests/eigen_tensor_avoid_stl_array.cpp create mode 100644 ext/pybind11/tests/test_const_name.cpp create mode 100644 ext/pybind11/tests/test_const_name.py rename ext/pybind11/tests/{test_eigen.cpp => test_eigen_matrix.cpp} (66%) rename ext/pybind11/tests/{test_eigen.py => test_eigen_matrix.py} (90%) create mode 100644 ext/pybind11/tests/test_eigen_tensor.cpp create mode 100644 ext/pybind11/tests/test_eigen_tensor.inl create mode 100644 ext/pybind11/tests/test_eigen_tensor.py create mode 100644 ext/pybind11/tests/test_embed/test_trampoline.py create mode 100644 ext/pybind11/tools/JoinPaths.cmake create mode 100644 ext/pybind11/tools/codespell_ignore_lines_from_errors.py create mode 100644 ext/pybind11/tools/pybind11.pc.in diff --git a/ext/pybind11/.appveyor.yml b/ext/pybind11/.appveyor.yml index 85445d41a2..360760ac8d 100644 --- a/ext/pybind11/.appveyor.yml +++ b/ext/pybind11/.appveyor.yml @@ -1,6 +1,6 @@ version: 1.0.{build} image: -- Visual Studio 2015 +- Visual Studio 2017 test: off skip_branch_with_pr: true build: @@ -11,11 +11,9 @@ environment: matrix: - PYTHON: 36 CONFIG: Debug - - PYTHON: 27 - CONFIG: Debug install: - ps: | - $env:CMAKE_GENERATOR = "Visual Studio 14 2015" + $env:CMAKE_GENERATOR = "Visual Studio 15 2017" if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" } $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH" python -W ignore -m pip install --upgrade pip wheel diff --git a/ext/pybind11/.clang-format b/ext/pybind11/.clang-format index 8700fca84d..b477a16037 100644 --- a/ext/pybind11/.clang-format +++ b/ext/pybind11/.clang-format @@ -3,19 +3,36 @@ # clang-format --style=llvm --dump-config BasedOnStyle: LLVM AccessModifierOffset: -4 -AlignConsecutiveAssignments: true +AllowShortLambdasOnASingleLine: true AlwaysBreakTemplateDeclarations: Yes BinPackArguments: false BinPackParameters: false BreakBeforeBinaryOperators: All BreakConstructorInitializers: BeforeColon ColumnLimit: 99 +CommentPragmas: 'NOLINT:.*|^ IWYU pragma:' +IncludeBlocks: Regroup IndentCaseLabels: true IndentPPDirectives: AfterHash IndentWidth: 4 Language: Cpp SpaceAfterCStyleCast: true -# SpaceInEmptyBlock: true # too new Standard: Cpp11 +StatementMacros: ['PyObject_HEAD'] TabWidth: 4 +IncludeCategories: + - Regex: '' + Priority: 4 + - Regex: '.*' + Priority: 5 ... diff --git a/ext/pybind11/.clang-tidy b/ext/pybind11/.clang-tidy index e29d929897..23018386c1 100644 --- a/ext/pybind11/.clang-tidy +++ b/ext/pybind11/.clang-tidy @@ -1,13 +1,77 @@ FormatStyle: file -Checks: ' -llvm-namespace-comment, -modernize-use-override, -readability-container-size-empty, -modernize-use-using, -modernize-use-equals-default, -modernize-use-auto, -modernize-use-emplace, -' +Checks: | + *bugprone*, + *performance*, + clang-analyzer-optin.cplusplus.VirtualCall, + clang-analyzer-optin.performance.Padding, + cppcoreguidelines-init-variables, + cppcoreguidelines-prefer-member-initializer, + cppcoreguidelines-pro-type-static-cast-downcast, + cppcoreguidelines-slicing, + google-explicit-constructor, + llvm-namespace-comment, + misc-definitions-in-headers, + misc-misplaced-const, + misc-non-copyable-objects, + misc-static-assert, + misc-throw-by-value-catch-by-reference, + misc-uniqueptr-reset-release, + misc-unused-parameters, + modernize-avoid-bind, + modernize-loop-convert, + modernize-make-shared, + modernize-redundant-void-arg, + modernize-replace-auto-ptr, + modernize-replace-disallow-copy-and-assign-macro, + modernize-replace-random-shuffle, + modernize-shrink-to-fit, + modernize-use-auto, + modernize-use-bool-literals, + modernize-use-default-member-init, + modernize-use-emplace, + modernize-use-equals-default, + modernize-use-equals-delete, + modernize-use-noexcept, + modernize-use-nullptr, + modernize-use-override, + modernize-use-using, + readability-avoid-const-params-in-decls, + readability-braces-around-statements, + readability-const-return-type, + readability-container-size-empty, + readability-delete-null-pointer, + readability-else-after-return, + readability-implicit-bool-conversion, + readability-inconsistent-declaration-parameter-name, + readability-make-member-function-const, + readability-misplaced-array-index, + readability-non-const-parameter, + readability-qualified-auto, + readability-redundant-function-ptr-dereference, + readability-redundant-smartptr-get, + readability-redundant-string-cstr, + readability-simplify-subscript-expr, + readability-static-accessed-through-instance, + readability-static-definition-in-anonymous-namespace, + readability-string-compare, + readability-suspicious-call-argument, + readability-uniqueptr-delete-release, + -bugprone-easily-swappable-parameters, + -bugprone-exception-escape, + -bugprone-reserved-identifier, + -bugprone-unused-raii, + +CheckOptions: +- key: modernize-use-equals-default.IgnoreMacros + value: false +- key: performance-for-range-copy.WarnOnAllAutoCopies + value: true +- key: performance-inefficient-string-concatenation.StrictMode + value: true +- key: performance-unnecessary-value-param.AllowedTypes + value: 'exception_ptr$;' +- key: readability-implicit-bool-conversion.AllowPointerConditions + value: true HeaderFilterRegex: 'pybind11/.*h' diff --git a/ext/pybind11/.codespell-ignore-lines b/ext/pybind11/.codespell-ignore-lines new file mode 100644 index 0000000000..2a01d63ebb --- /dev/null +++ b/ext/pybind11/.codespell-ignore-lines @@ -0,0 +1,24 @@ +template + template + auto &this_ = static_cast(*this); + if (load_impl(temp, false)) { + ssize_t nd = 0; + auto trivial = broadcast(buffers, nd, shape); + auto ndim = (size_t) nd; + int nd; + ssize_t ndim() const { return detail::array_proxy(m_ptr)->nd; } + using op = op_impl; +template + template + class_ &def(const detail::op_ &op, const Extra &...extra) { + class_ &def_cast(const detail::op_ &op, const Extra &...extra) { +@pytest.mark.parametrize("access", ["ro", "rw", "static_ro", "static_rw"]) +struct IntStruct { + explicit IntStruct(int v) : value(v){}; + ~IntStruct() { value = -value; } + IntStruct(const IntStruct &) = default; + IntStruct &operator=(const IntStruct &) = default; + py::class_(m, "IntStruct").def(py::init([](const int i) { return IntStruct(i); })); + py::implicitly_convertible(); + m.def("test", [](int expected, const IntStruct &in) { + [](int expected, const IntStruct &in) { diff --git a/ext/pybind11/.gitattributes b/ext/pybind11/.gitattributes new file mode 100644 index 0000000000..d611e1496d --- /dev/null +++ b/ext/pybind11/.gitattributes @@ -0,0 +1 @@ +docs/*.svg binary diff --git a/ext/pybind11/.github/CODEOWNERS b/ext/pybind11/.github/CODEOWNERS new file mode 100644 index 0000000000..4e2c66902e --- /dev/null +++ b/ext/pybind11/.github/CODEOWNERS @@ -0,0 +1,9 @@ +*.cmake @henryiii +CMakeLists.txt @henryiii +*.yml @henryiii +*.yaml @henryiii +/tools/ @henryiii +/pybind11/ @henryiii +noxfile.py @henryiii +.clang-format @henryiii +.clang-tidy @henryiii diff --git a/ext/pybind11/.github/CONTRIBUTING.md b/ext/pybind11/.github/CONTRIBUTING.md index 08d9e7cb93..00b1fea4cf 100644 --- a/ext/pybind11/.github/CONTRIBUTING.md +++ b/ext/pybind11/.github/CONTRIBUTING.md @@ -53,6 +53,33 @@ derivative works thereof, in binary and source code form. ## Development of pybind11 +### Quick setup + +To setup a quick development environment, use [`nox`](https://nox.thea.codes). +This will allow you to do some common tasks with minimal setup effort, but will +take more time to run and be less flexible than a full development environment. +If you use [`pipx run nox`](https://pipx.pypa.io), you don't even need to +install `nox`. Examples: + +```bash +# List all available sessions +nox -l + +# Run linters +nox -s lint + +# Run tests on Python 3.9 +nox -s tests-3.9 + +# Build and preview docs +nox -s docs -- serve + +# Build SDists and wheels +nox -s build +``` + +### Full setup + To setup an ideal development environment, run the following commands on a system with CMake 3.14+: @@ -66,11 +93,10 @@ cmake --build build -j4 Tips: -* You can use `virtualenv` (from PyPI) instead of `venv` (which is Python 3 - only). +* You can use `virtualenv` (faster, from PyPI) instead of `venv`. * You can select any name for your environment folder; if it contains "env" it will be ignored by git. -* If you don’t have CMake 3.14+, just add “cmake” to the pip install command. +* If you don't have CMake 3.14+, just add "cmake" to the pip install command. * You can use `-DPYBIND11_FINDPYTHON=ON` to use FindPython on CMake 3.12+ * In classic mode, you may need to set `-DPYTHON_EXECUTABLE=/path/to/python`. FindPython uses `-DPython_ROOT_DIR=/path/to` or @@ -78,7 +104,7 @@ Tips: ### Configuration options -In CMake, configuration options are given with “-D”. Options are stored in the +In CMake, configuration options are given with "-D". Options are stored in the build directory, in the `CMakeCache.txt` file, so they are remembered for each build directory. Two selections are special - the generator, given with `-G`, and the compiler, which is selected based on environment variables `CXX` and @@ -88,12 +114,12 @@ after the initial run. The valid options are: * `-DCMAKE_BUILD_TYPE`: Release, Debug, MinSizeRel, RelWithDebInfo -* `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+’s FindPython instead of the +* `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+'s FindPython instead of the classic, deprecated, custom FindPythonLibs * `-DPYBIND11_NOPYTHON=ON`: Disable all Python searching (disables tests) * `-DBUILD_TESTING=ON`: Enable the tests * `-DDOWNLOAD_CATCH=ON`: Download catch to build the C++ tests -* `-DOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests +* `-DDOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests * `-DPYBIND11_INSTALL=ON/OFF`: Enable the install target (on by default for the master project) * `-DUSE_PYTHON_INSTALL_DIR=ON`: Try to install into the python dir @@ -132,8 +158,9 @@ tests with these targets: * `test_cmake_build`: Install / subdirectory tests If you want to build just a subset of tests, use -`-DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp"`. If this is -empty, all tests will be built. +`-DPYBIND11_TEST_OVERRIDE="test_callbacks;test_pickling"`. If this is +empty, all tests will be built. Tests are specified without an extension if they need both a .py and +.cpp file. You may also pass flags to the `pytest` target by editing `tests/pytest.ini` or by using the `PYTEST_ADDOPTS` environment variable @@ -203,16 +230,19 @@ of the pybind11 repo. [`clang-tidy`][clang-tidy] performs deeper static code analyses and is more complex to run, compared to `clang-format`, but support for `clang-tidy` is built into the pybind11 CMake configuration. To run `clang-tidy`, the -following recipe should work. Files will be modified in place, so you can -use git to monitor the changes. +following recipe should work. Run the `docker` command from the top-level +directory inside your pybind11 git clone. Files will be modified in place, +so you can use git to monitor the changes. ```bash -docker run --rm -v $PWD:/pybind11 -it silkeh/clang:10 -apt-get update && apt-get install python3-dev python3-pytest -cmake -S pybind11/ -B build -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);-fix" -cmake --build build +docker run --rm -v $PWD:/mounted_pybind11 -it silkeh/clang:13 +apt-get update && apt-get install -y python3-dev python3-pytest +cmake -S /mounted_pybind11/ -B build -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--use-color" -DDOWNLOAD_EIGEN=ON -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=17 +cmake --build build -j 2 ``` +You can add `--fix` to the options list if you want. + ### Include what you use To run include what you use, install (`brew install include-what-you-use` on @@ -228,7 +258,7 @@ The report is sent to stderr; you can pipe it into a file if you wish. ### Build recipes This builds with the Intel compiler (assuming it is in your path, along with a -recent CMake and Python 3): +recent CMake and Python): ```bash python3 -m venv venv diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md b/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md deleted file mode 100644 index ae36ea6508..0000000000 --- a/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -name: Bug Report -about: File an issue about a bug -title: "[BUG] " ---- - - -Make sure you've completed the following steps before submitting your issue -- thank you! - -1. Make sure you've read the [documentation][]. Your issue may be addressed there. -2. Search the [issue tracker][] to verify that this hasn't already been reported. +1 or comment there if it has. -3. Consider asking first in the [Gitter chat room][]. -4. Include a self-contained and minimal piece of code that reproduces the problem. If that's not possible, try to make the description as clear as possible. - a. If possible, make a PR with a new, failing test to give us a starting point to work on! - -[documentation]: https://pybind11.readthedocs.io -[issue tracker]: https://github.com/pybind/pybind11/issues -[Gitter chat room]: https://gitter.im/pybind/Lobby - -*After reading, remove this checklist and the template text in parentheses below.* - -## Issue description - -(Provide a short description, state the expected behavior and what actually happens.) - -## Reproducible example code - -(The code should be minimal, have no external dependencies, isolate the function(s) that cause breakage. Submit matched and complete C++ and Python snippets that can be easily compiled and run to diagnose the issue.) diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml b/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 0000000000..4f1e78f33c --- /dev/null +++ b/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,61 @@ +name: Bug Report +description: File an issue about a bug +title: "[BUG]: " +labels: [triage] +body: + - type: markdown + attributes: + value: | + Please do your best to make the issue as easy to act on as possible, and only submit here if there is clearly a problem with pybind11 (ask first if unsure). **Note that a reproducer in a PR is much more likely to get immediate attention.** + + - type: checkboxes + id: steps + attributes: + label: Required prerequisites + description: Make sure you've completed the following steps before submitting your issue -- thank you! + options: + - label: Make sure you've read the [documentation](https://pybind11.readthedocs.io). Your issue may be addressed there. + required: true + - label: Search the [issue tracker](https://github.com/pybind/pybind11/issues) and [Discussions](https:/pybind/pybind11/discussions) to verify that this hasn't already been reported. +1 or comment there if it has. + required: true + - label: Consider asking first in the [Gitter chat room](https://gitter.im/pybind/Lobby) or in a [Discussion](https:/pybind/pybind11/discussions/new). + required: false + + - type: input + id: version + attributes: + label: What version (or hash if on master) of pybind11 are you using? + validations: + required: true + + - type: textarea + id: description + attributes: + label: Problem description + placeholder: >- + Provide a short description, state the expected behavior and what + actually happens. Include relevant information like what version of + pybind11 you are using, what system you are on, and any useful commands + / output. + validations: + required: true + + - type: textarea + id: code + attributes: + label: Reproducible example code + placeholder: >- + The code should be minimal, have no external dependencies, isolate the + function(s) that cause breakage. Submit matched and complete C++ and + Python snippets that can be easily compiled and run to diagnose the + issue. — Note that a reproducer in a PR is much more likely to get + immediate attention: failing tests in the pybind11 CI are the best + starting point for working out fixes. + render: text + + - type: input + id: regression + attributes: + label: Is this a regression? Put the last known working version here if it is. + description: Put the last known working version here if this is a regression. + value: Not a regression diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml b/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml index 20e743136f..27f9a80441 100644 --- a/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml +++ b/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,8 @@ blank_issues_enabled: false contact_links: + - name: Ask a question + url: https://github.com/pybind/pybind11/discussions/new + about: Please ask and answer questions here, or propose new ideas. - name: Gitter room url: https://gitter.im/pybind/Lobby about: A room for discussing pybind11 with an active community diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md b/ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md deleted file mode 100644 index 5f6ec81ec9..0000000000 --- a/ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: Feature Request -about: File an issue about adding a feature -title: "[FEAT] " ---- - - -Make sure you've completed the following steps before submitting your issue -- thank you! - -1. Check if your feature has already been mentioned / rejected / planned in other issues. -2. If those resources didn't help, consider asking in the [Gitter chat room][] to see if this is interesting / useful to a larger audience and possible to implement reasonably, -4. If you have a useful feature that passes the previous items (or not suitable for chat), please fill in the details below. - -[Gitter chat room]: https://gitter.im/pybind/Lobby - -*After reading, remove this checklist.* diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/question.md b/ext/pybind11/.github/ISSUE_TEMPLATE/question.md deleted file mode 100644 index b199b6ee8a..0000000000 --- a/ext/pybind11/.github/ISSUE_TEMPLATE/question.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: Question -about: File an issue about unexplained behavior -title: "[QUESTION] " ---- - -If you have a question, please check the following first: - -1. Check if your question has already been answered in the [FAQ][] section. -2. Make sure you've read the [documentation][]. Your issue may be addressed there. -3. If those resources didn't help and you only have a short question (not a bug report), consider asking in the [Gitter chat room][] -4. Search the [issue tracker][], including the closed issues, to see if your question has already been asked/answered. +1 or comment if it has been asked but has no answer. -5. If you have a more complex question which is not answered in the previous items (or not suitable for chat), please fill in the details below. -6. Include a self-contained and minimal piece of code that illustrates your question. If that's not possible, try to make the description as clear as possible. - -[FAQ]: http://pybind11.readthedocs.io/en/latest/faq.html -[documentation]: https://pybind11.readthedocs.io -[issue tracker]: https://github.com/pybind/pybind11/issues -[Gitter chat room]: https://gitter.im/pybind/Lobby - -*After reading, remove this checklist.* diff --git a/ext/pybind11/.github/dependabot.yml b/ext/pybind11/.github/dependabot.yml index 73273365c0..2c7d170839 100644 --- a/ext/pybind11/.github/dependabot.yml +++ b/ext/pybind11/.github/dependabot.yml @@ -5,12 +5,3 @@ updates: directory: "/" schedule: interval: "daily" - ignore: - # Official actions have moving tags like v1 - # that are used, so they don't need updates here - - dependency-name: "actions/checkout" - - dependency-name: "actions/setup-python" - - dependency-name: "actions/cache" - - dependency-name: "actions/upload-artifact" - - dependency-name: "actions/download-artifact" - - dependency-name: "actions/labeler" diff --git a/ext/pybind11/.github/matchers/pylint.json b/ext/pybind11/.github/matchers/pylint.json new file mode 100644 index 0000000000..e3a6bd16b0 --- /dev/null +++ b/ext/pybind11/.github/matchers/pylint.json @@ -0,0 +1,32 @@ +{ + "problemMatcher": [ + { + "severity": "warning", + "pattern": [ + { + "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "message": 5 + } + ], + "owner": "pylint-warning" + }, + { + "severity": "error", + "pattern": [ + { + "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "message": 5 + } + ], + "owner": "pylint-error" + } + ] +} diff --git a/ext/pybind11/.github/pull_request_template.md b/ext/pybind11/.github/pull_request_template.md index 97a6ff7dda..54b7f5100d 100644 --- a/ext/pybind11/.github/pull_request_template.md +++ b/ext/pybind11/.github/pull_request_template.md @@ -1,3 +1,7 @@ + ## Description diff --git a/ext/pybind11/.github/workflows/ci.yml b/ext/pybind11/.github/workflows/ci.yml index f90c199526..b36bbfe1b9 100644 --- a/ext/pybind11/.github/workflows/ci.yml +++ b/ext/pybind11/.github/workflows/ci.yml @@ -9,6 +9,17 @@ on: - stable - v* +concurrency: + group: test-${{ github.ref }} + cancel-in-progress: true + +env: + PIP_ONLY_BINARY: numpy + FORCE_COLOR: 3 + PYTEST_TIMEOUT: 300 + # For cmake: + VERBOSE: 1 + jobs: # This is the "main" test suite, which tests a large number of different # versions of default compilers and Python versions in GitHub Actions. @@ -16,66 +27,66 @@ jobs: strategy: fail-fast: false matrix: - runs-on: [ubuntu-latest, windows-latest, macos-latest] + runs-on: [ubuntu-20.04, windows-2022, macos-latest] python: - - 2.7 - - 3.5 - - 3.6 - - 3.9 - # - 3.10-dev # Re-enable once 3.10.0a5 is released - - pypy2 - - pypy3 + - '3.6' + - '3.9' + - '3.10' + - '3.11' + - 'pypy-3.7' + - 'pypy-3.8' + - 'pypy-3.9' # Items in here will either be added to the build matrix (if not # present), or add new keys to an existing matrix element if all the # existing keys match. # - # We support three optional keys: args (both build), args1 (first - # build), and args2 (second build). + # We support an optional key: args, for cmake args include: # Just add a key - - runs-on: ubuntu-latest - python: 3.6 + - runs-on: ubuntu-20.04 + python: '3.6' args: > -DPYBIND11_FINDPYTHON=ON - - runs-on: windows-latest - python: 3.6 + -DCMAKE_CXX_FLAGS="-D_=1" + - runs-on: ubuntu-20.04 + python: 'pypy-3.8' args: > -DPYBIND11_FINDPYTHON=ON - - # These items will be removed from the build matrix, keys must match. - exclude: - # Currently 32bit only, and we build 64bit - - runs-on: windows-latest - python: pypy2 - - runs-on: windows-latest - python: pypy3 - - # TODO: PyPy2 7.3.3 segfaults, while 7.3.2 was fine. - - runs-on: ubuntu-latest - python: pypy2 + - runs-on: windows-2019 + python: '3.6' + args: > + -DPYBIND11_FINDPYTHON=ON + # Inject a couple Windows 2019 runs + - runs-on: windows-2019 + python: '3.9' name: "🐍 ${{ matrix.python }} • ${{ matrix.runs-on }} • x64 ${{ matrix.args }}" runs-on: ${{ matrix.runs-on }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - - name: Setup Boost (Windows / Linux latest) - shell: bash - run: echo "BOOST_ROOT=$BOOST_ROOT_1_72_0" >> $GITHUB_ENV + - name: Setup Boost (Linux) + # Can't use boost + define _ + if: runner.os == 'Linux' && matrix.python != '3.6' + run: sudo apt-get install libboost-dev + + - name: Setup Boost (macOS) + if: runner.os == 'macOS' + run: brew install boost - name: Update CMake - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.13 - name: Cache wheels if: runner.os == 'macOS' - uses: actions/cache@v2 + uses: actions/cache@v3 with: # This path is specific to macOS - we really only need it for PyPy NumPy wheels # See https://github.com/actions/cache/blob/master/examples.md#python---pip @@ -85,17 +96,20 @@ jobs: key: ${{ runner.os }}-pip-${{ matrix.python }}-x64-${{ hashFiles('tests/requirements.txt') }} - name: Prepare env - run: python -m pip install -r tests/requirements.txt --prefer-binary + run: | + python -m pip install -r tests/requirements.txt - name: Setup annotations on Linux if: runner.os == 'Linux' run: python -m pip install pytest-github-actions-annotate-failures # First build - C++11 mode and inplace + # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON here. - name: Configure C++11 ${{ matrix.args }} run: > cmake -S . -B . -DPYBIND11_WERROR=ON + -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DCMAKE_CXX_STANDARD=11 @@ -109,7 +123,7 @@ jobs: - name: C++11 tests # TODO: Figure out how to load the DLL on Python 3.8+ - if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))" + if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10' || matrix.python == '3.11' || matrix.python == 'pypy-3.8'))" run: cmake --build . --target cpptest -j 2 - name: Interface test C++11 @@ -119,15 +133,16 @@ jobs: run: git clean -fdx # Second build - C++17 mode and in a build directory - - name: Configure ${{ matrix.args2 }} + # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF here. + - name: Configure C++17 run: > cmake -S . -B build2 -DPYBIND11_WERROR=ON + -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DCMAKE_CXX_STANDARD=17 ${{ matrix.args }} - ${{ matrix.args2 }} - name: Build run: cmake --build build2 -j 2 @@ -137,32 +152,35 @@ jobs: - name: C++ tests # TODO: Figure out how to load the DLL on Python 3.8+ - if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))" + if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10' || matrix.python == '3.11' || matrix.python == 'pypy-3.8'))" run: cmake --build build2 --target cpptest + # Third build - C++17 mode with unstable ABI + - name: Configure (unstable ABI) + run: > + cmake -S . -B build3 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + -DPYBIND11_INTERNALS_VERSION=10000000 + "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp" + ${{ matrix.args }} + + - name: Build (unstable ABI) + run: cmake --build build3 -j 2 + + - name: Python tests (unstable ABI) + run: cmake --build build3 --target pytest + - name: Interface test run: cmake --build build2 --target test_cmake_build - # Eventually Microsoft might have an action for setting up - # MSVC, but for now, this action works: - - name: Prepare compiler environment for Windows 🐍 2.7 - if: matrix.python == 2.7 && runner.os == 'Windows' - uses: ilammy/msvc-dev-cmd@v1 - with: - arch: x64 - - # This makes two environment variables available in the following step(s) - - name: Set Windows 🐍 2.7 environment variables - if: matrix.python == 2.7 && runner.os == 'Windows' - shell: bash - run: | - echo "DISTUTILS_USE_SDK=1" >> $GITHUB_ENV - echo "MSSdk=1" >> $GITHUB_ENV - # This makes sure the setup_helpers module can build packages using # setuptools - name: Setuptools helpers test run: pytest tests/extra_setuptools + if: "!(matrix.runs-on == 'windows-2022')" deadsnakes: @@ -170,30 +188,31 @@ jobs: fail-fast: false matrix: include: - - python-version: 3.9 + # TODO: Fails on 3.10, investigate + - python-version: "3.9" python-debug: true valgrind: true - - python-version: 3.10-dev + - python-version: "3.11" python-debug: false name: "🐍 ${{ matrix.python-version }}${{ matrix.python-debug && '-dbg' || '' }} (deadsnakes)${{ matrix.valgrind && ' • Valgrind' || '' }} • x64" - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup Python ${{ matrix.python-version }} (deadsnakes) - uses: deadsnakes/action@v2.1.1 + uses: deadsnakes/action@v3.0.0 with: python-version: ${{ matrix.python-version }} debug: ${{ matrix.python-debug }} - name: Update CMake - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.13 - name: Valgrind cache if: matrix.valgrind - uses: actions/cache@v2 + uses: actions/cache@v3 id: cache-valgrind with: path: valgrind @@ -218,9 +237,12 @@ jobs: sudo apt-get install libc6-dbg # Needed by Valgrind - name: Prepare env - run: python -m pip install -r tests/requirements.txt --prefer-binary + run: | + python -m pip install -r tests/requirements.txt - name: Configure + env: + SETUPTOOLS_USE_DISTUTILS: stdlib run: > cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug @@ -261,16 +283,22 @@ jobs: include: - clang: 5 std: 14 - - clang: 10 - std: 20 - clang: 10 std: 17 + - clang: 11 + std: 20 + - clang: 12 + std: 20 + - clang: 13 + std: 20 + - clang: 14 + std: 20 name: "🐍 3 • Clang ${{ matrix.clang }} • C++${{ matrix.std }} • x64" container: "silkeh/clang:${{ matrix.clang }}" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Add wget and python3 run: apt-get update && apt-get install -y python3-dev python3-numpy python3-pytest libeigen3-dev @@ -300,11 +328,11 @@ jobs: # Testing NVCC; forces sources to behave like .cu files cuda: runs-on: ubuntu-latest - name: "🐍 3.8 • CUDA 11 • Ubuntu 20.04" - container: nvidia/cuda:11.0-devel-ubuntu20.04 + name: "🐍 3.10 • CUDA 11.7 • Ubuntu 22.04" + container: nvidia/cuda:11.7.0-devel-ubuntu22.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # tzdata will try to ask for the timezone, so set the DEBIAN_FRONTEND - name: Install 🐍 3 @@ -328,7 +356,7 @@ jobs: # container: centos:8 # # steps: -# - uses: actions/checkout@v2 +# - uses: actions/checkout@v3 # # - name: Add Python 3 and a few requirements # run: yum update -y && yum install -y git python3-devel python3-numpy python3-pytest make environment-modules @@ -367,32 +395,32 @@ jobs: # Testing on CentOS 7 + PGI compilers, which seems to require more workarounds centos-nvhpc7: runs-on: ubuntu-latest - name: "🐍 3 • CentOS7 / PGI 20.9 • x64" + name: "🐍 3 • CentOS7 / PGI 22.9 • x64" container: centos:7 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Add Python 3 and a few requirements - run: yum update -y && yum install -y epel-release && yum install -y git python3-devel make environment-modules cmake3 + run: yum update -y && yum install -y epel-release && yum install -y git python3-devel make environment-modules cmake3 yum-utils - name: Install NVidia HPC SDK - run: yum -y install https://developer.download.nvidia.com/hpc-sdk/20.9/nvhpc-20-9-20.9-1.x86_64.rpm https://developer.download.nvidia.com/hpc-sdk/20.9/nvhpc-2020-20.9-1.x86_64.rpm + run: yum-config-manager --add-repo https://developer.download.nvidia.com/hpc-sdk/rhel/nvhpc.repo && yum -y install nvhpc-22.9 # On CentOS 7, we have to filter a few tests (compiler internal error) - # and allow deeper templete recursion (not needed on CentOS 8 with a newer + # and allow deeper template recursion (not needed on CentOS 8 with a newer # standard library). On some systems, you many need further workarounds: # https://github.com/pybind/pybind11/pull/2475 - name: Configure shell: bash run: | source /etc/profile.d/modules.sh - module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.9 + module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/22.9 cmake3 -S . -B build -DDOWNLOAD_CATCH=ON \ -DCMAKE_CXX_STANDARD=11 \ -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") \ -DCMAKE_CXX_FLAGS="-Wc,--pending_instantiations=0" \ - -DPYBIND11_TEST_FILTER="test_smart_ptr.cpp;test_virtual_functions.cpp" + -DPYBIND11_TEST_FILTER="test_smart_ptr.cpp" # Building before installing Pip should produce a warning but not an error - name: Build @@ -419,20 +447,20 @@ jobs: strategy: fail-fast: false matrix: - gcc: - - 7 - - latest - std: - - 11 include: - - gcc: 10 - std: 20 + - { gcc: 7, std: 11 } + - { gcc: 7, std: 17 } + - { gcc: 8, std: 14 } + - { gcc: 8, std: 17 } + - { gcc: 10, std: 17 } + - { gcc: 11, std: 20 } + - { gcc: 12, std: 20 } name: "🐍 3 • GCC ${{ matrix.gcc }} • C++${{ matrix.std }}• x64" container: "gcc:${{ matrix.gcc }}" steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 - name: Add Python 3 run: apt-get update; apt-get install -y python3-dev python3-numpy python3-pytest python3-pip libeigen3-dev @@ -441,7 +469,7 @@ jobs: run: python3 -m pip install --upgrade pip - name: Update CMake - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.13 - name: Configure shell: bash @@ -474,7 +502,7 @@ jobs: name: "🐍 3 • ICC latest • x64" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Add apt repo run: | @@ -495,7 +523,7 @@ jobs: - name: Install dependencies run: | set +e; source /opt/intel/oneapi/setvars.sh; set -e - python3 -m pip install -r tests/requirements.txt --prefer-binary + python3 -m pip install -r tests/requirements.txt - name: Configure C++11 run: | @@ -569,29 +597,37 @@ jobs: strategy: fail-fast: false matrix: - centos: - - 7 # GCC 4.8 - - 8 + container: + - "centos:7" # GCC 4.8 + - "almalinux:8" + - "almalinux:9" - name: "🐍 3 • CentOS ${{ matrix.centos }} • x64" - container: "centos:${{ matrix.centos }}" + name: "🐍 3 • ${{ matrix.container }} • x64" + container: "${{ matrix.container }}" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Add Python 3 + - name: Add Python 3 (RHEL 7) + if: matrix.container == 'centos:7' run: yum update -y && yum install -y python3-devel gcc-c++ make git + - name: Add Python 3 (RHEL 8+) + if: matrix.container != 'centos:7' + run: dnf update -y && dnf install -y python3-devel gcc-c++ make git + - name: Update pip run: python3 -m pip install --upgrade pip - name: Install dependencies - run: python3 -m pip install cmake -r tests/requirements.txt --prefer-binary + run: | + python3 -m pip install cmake -r tests/requirements.txt - name: Configure shell: bash run: > cmake -S . -B build + -DCMAKE_BUILD_TYPE=MinSizeRel -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON @@ -613,18 +649,18 @@ jobs: # This tests an "install" with the CMake tools install-classic: - name: "🐍 3.5 • Debian • x86 • Install" + name: "🐍 3.7 • Debian • x86 • Install" runs-on: ubuntu-latest - container: i386/debian:stretch + container: i386/debian:buster steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v1 # Required to run inside docker - name: Install requirements run: | apt-get update apt-get install -y git make cmake g++ libeigen3-dev python3-dev python3-pip - pip3 install "pytest==3.1.*" + pip3 install "pytest==6.*" - name: Configure for install run: > @@ -649,33 +685,32 @@ jobs: -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") working-directory: /build-tests - - name: Run tests + - name: Python tests run: make pytest -j 2 working-directory: /build-tests # This verifies that the documentation is not horribly broken, and does a - # basic sanity check on the SDist. + # basic validation check on the SDist. doxygen: name: "Documentation build test" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" - name: Install Doxygen run: sudo apt-get install -y doxygen librsvg2-bin # Changed to rsvg-convert in 20.04 - - name: Install docs & setup requirements - run: python3 -m pip install -r docs/requirements.txt - - name: Build docs - run: python3 -m sphinx -W -b html docs docs/.build + run: pipx run nox -s docs - name: Make SDist - run: python3 setup.py sdist + run: pipx run nox -s build -- --sdist - run: git status --ignored @@ -687,7 +722,7 @@ jobs: - name: Compare Dists (headers only) working-directory: include run: | - python3 -m pip install --user -U ../dist/* + python3 -m pip install --user -U ../dist/*.tar.gz installed=$(python3 -c "import pybind11; print(pybind11.get_include() + '/pybind11')") diff -rq $installed ./pybind11 @@ -696,42 +731,43 @@ jobs: fail-fast: false matrix: python: - - 3.5 - 3.6 - 3.7 - 3.8 - 3.9 - - pypy3 - # TODO: fix hang on pypy2 include: - python: 3.9 - args: -DCMAKE_CXX_STANDARD=20 -DDOWNLOAD_EIGEN=OFF + args: -DCMAKE_CXX_STANDARD=20 - python: 3.8 args: -DCMAKE_CXX_STANDARD=17 + - python: 3.7 + args: -DCMAKE_CXX_STANDARD=14 + name: "🐍 ${{ matrix.python }} • MSVC 2019 • x86 ${{ matrix.args }}" - runs-on: windows-latest + runs-on: windows-2019 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} architecture: x86 - name: Update CMake - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.13 - name: Prepare MSVC - uses: ilammy/msvc-dev-cmd@v1 + uses: ilammy/msvc-dev-cmd@v1.12.0 with: arch: x86 - name: Prepare env - run: python -m pip install -r tests/requirements.txt --prefer-binary + run: | + python -m pip install -r tests/requirements.txt # First build - C++11 mode and inplace - name: Configure ${{ matrix.args }} @@ -745,102 +781,324 @@ jobs: - name: Build C++11 run: cmake --build build -j 2 - - name: Run tests + - name: Python tests run: cmake --build build -t pytest - win32-msvc2015: - name: "🐍 ${{ matrix.python }} • MSVC 2015 • x64" - runs-on: windows-latest + win32-debug: strategy: fail-fast: false matrix: python: - - 2.7 - - 3.6 - - 3.7 - # todo: check/cpptest does not support 3.8+ yet - - steps: - - uses: actions/checkout@v2 - - - name: Setup 🐍 ${{ matrix.python }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python }} - - - name: Update CMake - uses: jwlawson/actions-setup-cmake@v1.7 - - - name: Prepare MSVC - uses: ilammy/msvc-dev-cmd@v1 - with: - toolset: 14.0 - - - name: Prepare env - run: python -m pip install -r tests/requirements.txt --prefer-binary - - # First build - C++11 mode and inplace - - name: Configure - run: > - cmake -S . -B build - -G "Visual Studio 14 2015" -A x64 - -DPYBIND11_WERROR=ON - -DDOWNLOAD_CATCH=ON - -DDOWNLOAD_EIGEN=ON - - - name: Build C++14 - run: cmake --build build -j 2 - - - name: Run all checks - run: cmake --build build -t check - - - win32-msvc2017: - name: "🐍 ${{ matrix.python }} • MSVC 2017 • x64" - runs-on: windows-2016 - strategy: - fail-fast: false - matrix: - python: - - 2.7 - - 3.5 - - 3.7 - std: - - 14 + - 3.8 + - 3.9 include: - - python: 2.7 - std: 17 - args: > - -DCMAKE_CXX_FLAGS="/permissive- /EHsc /GR" + - python: 3.9 + args: -DCMAKE_CXX_STANDARD=20 + - python: 3.8 + args: -DCMAKE_CXX_STANDARD=17 + + name: "🐍 ${{ matrix.python }} • MSVC 2019 (Debug) • x86 ${{ matrix.args }}" + runs-on: windows-2019 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Setup 🐍 ${{ matrix.python }} - uses: actions/setup-python@v2 + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} + architecture: x86 - name: Update CMake - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.13 + + - name: Prepare MSVC + uses: ilammy/msvc-dev-cmd@v1.12.0 + with: + arch: x86 - name: Prepare env - run: python -m pip install -r tests/requirements.txt --prefer-binary + run: | + python -m pip install -r tests/requirements.txt # First build - C++11 mode and inplace - - name: Configure + - name: Configure ${{ matrix.args }} run: > cmake -S . -B build - -G "Visual Studio 15 2017" -A x64 + -G "Visual Studio 16 2019" -A Win32 + -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON - -DCMAKE_CXX_STANDARD=${{ matrix.std }} ${{ matrix.args }} + - name: Build C++11 + run: cmake --build build --config Debug -j 2 - - name: Build ${{ matrix.std }} + - name: Python tests + run: cmake --build build --config Debug -t pytest + + + windows-2022: + strategy: + fail-fast: false + matrix: + python: + - 3.9 + + name: "🐍 ${{ matrix.python }} • MSVC 2022 C++20 • x64" + runs-on: windows-2022 + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + + - name: Prepare env + run: | + python3 -m pip install -r tests/requirements.txt + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.13 + + - name: Configure C++20 + run: > + cmake -S . -B build + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=20 + + - name: Build C++20 run: cmake --build build -j 2 - - name: Run all checks - run: cmake --build build -t check + - name: Python tests + run: cmake --build build --target pytest + + - name: C++20 tests + run: cmake --build build --target cpptest -j 2 + + - name: Interface test C++20 + run: cmake --build build --target test_cmake_build + + mingw: + name: "🐍 3 • windows-latest • ${{ matrix.sys }}" + runs-on: windows-latest + defaults: + run: + shell: msys2 {0} + strategy: + fail-fast: false + matrix: + include: + - { sys: mingw64, env: x86_64 } + - { sys: mingw32, env: i686 } + steps: + - uses: msys2/setup-msys2@v2 + with: + msystem: ${{matrix.sys}} + install: >- + git + mingw-w64-${{matrix.env}}-gcc + mingw-w64-${{matrix.env}}-python-pip + mingw-w64-${{matrix.env}}-python-numpy + mingw-w64-${{matrix.env}}-python-scipy + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-make + mingw-w64-${{matrix.env}}-python-pytest + mingw-w64-${{matrix.env}}-eigen3 + mingw-w64-${{matrix.env}}-boost + mingw-w64-${{matrix.env}}-catch + + - uses: actions/checkout@v3 + + - name: Configure C++11 + # LTO leads to many undefined reference like + # `pybind11::detail::function_call::function_call(pybind11::detail::function_call&&) + run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=11 -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -S . -B build + + - name: Build C++11 + run: cmake --build build -j 2 + + - name: Python tests C++11 + run: cmake --build build --target pytest -j 2 + + - name: C++11 tests + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target cpptest -j 2 + + - name: Interface test C++11 + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + - name: Configure C++14 + run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=14 -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -S . -B build2 + + - name: Build C++14 + run: cmake --build build2 -j 2 + + - name: Python tests C++14 + run: cmake --build build2 --target pytest -j 2 + + - name: C++14 tests + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target cpptest -j 2 + + - name: Interface test C++14 + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + - name: Configure C++17 + run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=17 -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -S . -B build3 + + - name: Build C++17 + run: cmake --build build3 -j 2 + + - name: Python tests C++17 + run: cmake --build build3 --target pytest -j 2 + + - name: C++17 tests + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target cpptest -j 2 + + - name: Interface test C++17 + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target test_cmake_build + + windows_clang: + + strategy: + matrix: + os: [windows-latest] + python: ['3.10'] + + runs-on: "${{ matrix.os }}" + + name: "🐍 ${{ matrix.python }} • ${{ matrix.os }} • clang-latest" + + steps: + - name: Show env + run: env + + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Clang + uses: egor-tensin/setup-clang@v1 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.13 + + - name: Install ninja-build tool + uses: seanmiddleditch/gha-setup-ninja@v3 + + - name: Run pip installs + run: | + python -m pip install --upgrade pip + python -m pip install -r tests/requirements.txt + + - name: Show Clang++ version + run: clang++ --version + + - name: Show CMake version + run: cmake --version + + # TODO: WERROR=ON + - name: Configure Clang + run: > + cmake -G Ninja -S . -B . + -DPYBIND11_WERROR=OFF + -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_COMPILER=clang++ + -DCMAKE_CXX_STANDARD=17 + + - name: Build + run: cmake --build . -j 2 + + - name: Python tests + run: cmake --build . --target pytest -j 2 + + - name: C++ tests + run: cmake --build . --target cpptest -j 2 + + - name: Interface test + run: cmake --build . --target test_cmake_build -j 2 + + - name: Clean directory + run: git clean -fdx + + macos_brew_install_llvm: + name: "macos-latest • brew install llvm" + runs-on: macos-latest + + env: + # https://apple.stackexchange.com/questions/227026/how-to-install-recent-clang-with-homebrew + LDFLAGS: '-L/usr/local/opt/llvm/lib -Wl,-rpath,/usr/local/opt/llvm/lib' + + steps: + - name: Update PATH + run: echo "/usr/local/opt/llvm/bin" >> $GITHUB_PATH + + - name: Show env + run: env + + - name: Checkout + uses: actions/checkout@v3 + + - name: Show Clang++ version before brew install llvm + run: clang++ --version + + - name: brew install llvm + run: brew install llvm + + - name: Show Clang++ version after brew install llvm + run: clang++ --version + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.13 + + - name: Run pip installs + run: | + python3 -m pip install --upgrade pip + python3 -m pip install -r tests/requirements.txt + python3 -m pip install numpy + python3 -m pip install scipy + + - name: Show CMake version + run: cmake --version + + - name: CMake Configure + run: > + cmake -S . -B . + -DPYBIND11_WERROR=ON + -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_COMPILER=clang++ + -DCMAKE_CXX_STANDARD=17 + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Build + run: cmake --build . -j 2 + + - name: Python tests + run: cmake --build . --target pytest -j 2 + + - name: C++ tests + run: cmake --build . --target cpptest -j 2 + + - name: Interface test + run: cmake --build . --target test_cmake_build -j 2 + + - name: Clean directory + run: git clean -fdx diff --git a/ext/pybind11/.github/workflows/configure.yml b/ext/pybind11/.github/workflows/configure.yml index 578dba630e..29b041168e 100644 --- a/ext/pybind11/.github/workflows/configure.yml +++ b/ext/pybind11/.github/workflows/configure.yml @@ -9,6 +9,10 @@ on: - stable - v* +env: + # For cmake: + VERBOSE: 1 + jobs: # This tests various versions of CMake in various combinations, to make sure # the configure step passes. @@ -16,12 +20,12 @@ jobs: strategy: fail-fast: false matrix: - runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: [ubuntu-20.04, macos-latest, windows-latest] arch: [x64] - cmake: [3.18] + cmake: ["3.23"] include: - - runs-on: ubuntu-latest + - runs-on: ubuntu-20.04 arch: x64 cmake: 3.4 @@ -29,22 +33,18 @@ jobs: arch: x64 cmake: 3.7 - - runs-on: windows-2016 - arch: x86 - cmake: 3.8 - - - runs-on: windows-2016 - arch: x86 + - runs-on: windows-2019 + arch: x64 # x86 compilers seem to be missing on 2019 image cmake: 3.18 name: 🐍 3.7 • CMake ${{ matrix.cmake }} • ${{ matrix.runs-on }} runs-on: ${{ matrix.runs-on }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup Python 3.7 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.7 architecture: ${{ matrix.arch }} @@ -55,7 +55,7 @@ jobs: # An action for adding a specific version of CMake: # https://github.com/jwlawson/actions-setup-cmake - name: Setup CMake ${{ matrix.cmake }} - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.13 with: cmake-version: ${{ matrix.cmake }} diff --git a/ext/pybind11/.github/workflows/format.yml b/ext/pybind11/.github/workflows/format.yml index 5cebed17da..b18474bc3d 100644 --- a/ext/pybind11/.github/workflows/format.yml +++ b/ext/pybind11/.github/workflows/format.yml @@ -12,24 +12,35 @@ on: - stable - "v*" +env: + FORCE_COLOR: 3 + # For cmake: + VERBOSE: 1 + jobs: pre-commit: name: Format runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.0 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Add matchers + run: echo "::add-matcher::$GITHUB_WORKSPACE/.github/matchers/pylint.json" + - uses: pre-commit/action@v3.0.0 with: # Slow hooks are marked with manual - slow is okay here, run them too extra_args: --hook-stage manual --all-files clang-tidy: + # When making changes here, please also review the "Clang-Tidy" section + # in .github/CONTRIBUTING.md and update as needed. name: Clang-Tidy runs-on: ubuntu-latest - container: silkeh/clang:10 + container: silkeh/clang:13 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install requirements run: apt-get update && apt-get install -y python3-dev python3-pytest @@ -37,10 +48,10 @@ jobs: - name: Configure run: > cmake -S . -B build - -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--warnings-as-errors=*" + -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--use-color;--warnings-as-errors=*" -DDOWNLOAD_EIGEN=ON -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=17 - name: Build - run: cmake --build build -j 2 + run: cmake --build build -j 2 -- --keep-going diff --git a/ext/pybind11/.github/workflows/labeler.yml b/ext/pybind11/.github/workflows/labeler.yml index d2b5979681..165a2fd87b 100644 --- a/ext/pybind11/.github/workflows/labeler.yml +++ b/ext/pybind11/.github/workflows/labeler.yml @@ -10,7 +10,11 @@ jobs: steps: - uses: actions/labeler@main - if: github.event.pull_request.merged == true + if: > + github.event.pull_request.merged == true && + !startsWith(github.event.pull_request.title, 'chore(deps):') && + !startsWith(github.event.pull_request.title, 'ci(fix):') && + !startsWith(github.event.pull_request.title, 'docs(changelog):') with: repo-token: ${{ secrets.GITHUB_TOKEN }} configuration-path: .github/labeler_merged.yml diff --git a/ext/pybind11/.github/workflows/pip.yml b/ext/pybind11/.github/workflows/pip.yml index 4414a12ee4..7c6fc67a3e 100644 --- a/ext/pybind11/.github/workflows/pip.yml +++ b/ext/pybind11/.github/workflows/pip.yml @@ -12,24 +12,28 @@ on: types: - published +env: + PIP_ONLY_BINARY: numpy + jobs: # This builds the sdists and wheels and makes sure the files are exactly as - # expected. Using Windows and Python 2.7, since that is often the most + # expected. Using Windows and Python 3.6, since that is often the most # challenging matrix element. test-packaging: - name: 🐍 2.7 • 📦 tests • windows-latest + name: 🐍 3.6 • 📦 tests • windows-latest runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Setup 🐍 2.7 - uses: actions/setup-python@v2 + - name: Setup 🐍 3.6 + uses: actions/setup-python@v4 with: - python-version: 2.7 + python-version: 3.6 - name: Prepare env - run: python -m pip install -r tests/requirements.txt --prefer-binary + run: | + python -m pip install -r tests/requirements.txt - name: Python Packaging tests run: pytest tests/extra_python_package/ @@ -42,15 +46,16 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup 🐍 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.8 - name: Prepare env - run: python -m pip install -r tests/requirements.txt build twine --prefer-binary + run: | + python -m pip install -r tests/requirements.txt build twine - name: Python Packaging tests run: pytest tests/extra_python_package/ @@ -64,13 +69,13 @@ jobs: run: twine check dist/* - name: Save standard package - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: standard path: dist/pybind11-* - name: Save global package - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: global path: dist/pybind11_global-* @@ -85,19 +90,21 @@ jobs: needs: [packaging] steps: - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" # Downloads all to directories matching the artifact names - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 - name: Publish standard package - uses: pypa/gh-action-pypi-publish@v1.4.1 + uses: pypa/gh-action-pypi-publish@v1.6.4 with: password: ${{ secrets.pypi_password }} packages_dir: standard/ - name: Publish global package - uses: pypa/gh-action-pypi-publish@v1.4.1 + uses: pypa/gh-action-pypi-publish@v1.6.4 with: password: ${{ secrets.pypi_password_global }} packages_dir: global/ diff --git a/ext/pybind11/.github/workflows/upstream.yml b/ext/pybind11/.github/workflows/upstream.yml new file mode 100644 index 0000000000..a15861ee47 --- /dev/null +++ b/ext/pybind11/.github/workflows/upstream.yml @@ -0,0 +1,114 @@ + +name: Upstream + +on: + workflow_dispatch: + pull_request: + +concurrency: + group: upstream-${{ github.ref }} + cancel-in-progress: true + +env: + PIP_ONLY_BINARY: numpy + # For cmake: + VERBOSE: 1 + +jobs: + standard: + name: "🐍 3.11 latest internals • ubuntu-latest • x64" + runs-on: ubuntu-latest + if: "contains(github.event.pull_request.labels.*.name, 'python dev')" + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11-dev" + + - name: Setup Boost (Linux) + if: runner.os == 'Linux' + run: sudo apt-get install libboost-dev + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.13 + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + - name: Setup annotations on Linux + if: runner.os == 'Linux' + run: python -m pip install pytest-github-actions-annotate-failures + + # First build - C++11 mode and inplace + - name: Configure C++11 + run: > + cmake -S . -B . + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=11 + + - name: Build C++11 + run: cmake --build . -j 2 + + - name: Python tests C++11 + run: cmake --build . --target pytest -j 2 + + - name: C++11 tests + run: cmake --build . --target cpptest -j 2 + + - name: Interface test C++11 + run: cmake --build . --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + # Second build - C++17 mode and in a build directory + - name: Configure C++17 + run: > + cmake -S . -B build2 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + ${{ matrix.args }} + ${{ matrix.args2 }} + + - name: Build + run: cmake --build build2 -j 2 + + - name: Python tests + run: cmake --build build2 --target pytest + + - name: C++ tests + run: cmake --build build2 --target cpptest + + # Third build - C++17 mode with unstable ABI + - name: Configure (unstable ABI) + run: > + cmake -S . -B build3 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + -DPYBIND11_INTERNALS_VERSION=10000000 + "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp" + ${{ matrix.args }} + + - name: Build (unstable ABI) + run: cmake --build build3 -j 2 + + - name: Python tests (unstable ABI) + run: cmake --build build3 --target pytest + + - name: Interface test + run: cmake --build build3 --target test_cmake_build + + # This makes sure the setup_helpers module can build packages using + # setuptools + - name: Setuptools helpers test + run: pytest tests/extra_setuptools diff --git a/ext/pybind11/.gitignore b/ext/pybind11/.gitignore index 3f36b89e0c..43d5094c96 100644 --- a/ext/pybind11/.gitignore +++ b/ext/pybind11/.gitignore @@ -41,3 +41,6 @@ pybind11Targets.cmake /.vscode /pybind11/include/* /pybind11/share/* +/docs/_build/* +.ipynb_checkpoints/ +tests/main.cpp diff --git a/ext/pybind11/.pre-commit-config.yaml b/ext/pybind11/.pre-commit-config.yaml index 6781ac4f11..d625d5726b 100644 --- a/ext/pybind11/.pre-commit-config.yaml +++ b/ext/pybind11/.pre-commit-config.yaml @@ -12,49 +12,118 @@ # # See https://github.com/pre-commit/pre-commit + +ci: + autoupdate_commit_msg: "chore(deps): update pre-commit hooks" + autofix_commit_msg: "style: pre-commit fixes" + autoupdate_schedule: monthly + +# third-party content +exclude: ^tools/JoinPaths.cmake$ + repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: "v4.4.0" hooks: - id: check-added-large-files - id: check-case-conflict + - id: check-docstring-first - id: check-merge-conflict - id: check-symlinks + - id: check-toml - id: check-yaml - id: debug-statements - id: end-of-file-fixer - id: mixed-line-ending - id: requirements-txt-fixer - id: trailing-whitespace - - id: fix-encoding-pragma + +# Upgrade old Python syntax +- repo: https://github.com/asottile/pyupgrade + rev: "v3.3.1" + hooks: + - id: pyupgrade + args: [--py36-plus] + +# Nicely sort includes +- repo: https://github.com/PyCQA/isort + rev: "5.11.4" + hooks: + - id: isort # Black, the code formatter, natively supports pre-commit - repo: https://github.com/psf/black - rev: 20.8b1 + rev: "22.12.0" # Keep in sync with blacken-docs hooks: - id: black - # By default, this ignores pyi files, though black supports them - types: [text] - files: \.pyi?$ + +# Also code format the docs +- repo: https://github.com/asottile/blacken-docs + rev: "v1.12.1" + hooks: + - id: blacken-docs + additional_dependencies: + - black==22.10.0 # keep in sync with black hook # Changes tabs to spaces - repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.1.9 + rev: "v1.3.1" hooks: - id: remove-tabs +- repo: https://github.com/sirosen/texthooks + rev: "0.4.0" + hooks: + - id: fix-ligatures + - id: fix-smartquotes + +# Autoremoves unused imports +- repo: https://github.com/hadialqattan/pycln + rev: "v2.1.2" + hooks: + - id: pycln + stages: [manual] + +# Checking for common mistakes +- repo: https://github.com/pre-commit/pygrep-hooks + rev: "v1.9.0" + hooks: + - id: python-check-blanket-noqa + - id: python-check-blanket-type-ignore + - id: python-no-log-warn + - id: python-use-type-annotations + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal + +# Automatically remove noqa that are not used +- repo: https://github.com/asottile/yesqa + rev: "v1.4.0" + hooks: + - id: yesqa + additional_dependencies: &flake8_dependencies + - flake8-bugbear + - pep8-naming + # Flake8 also supports pre-commit natively (same author) -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 +- repo: https://github.com/PyCQA/flake8 + rev: "6.0.0" hooks: - id: flake8 - additional_dependencies: [flake8-bugbear, pep8-naming] exclude: ^(docs/.*|tools/.*)$ + additional_dependencies: *flake8_dependencies + +# PyLint has native support - not always usable, but works for us +- repo: https://github.com/PyCQA/pylint + rev: "v2.15.9" + hooks: + - id: pylint + files: ^pybind11 # CMake formatting - repo: https://github.com/cheshirekow/cmake-format-precommit - rev: v0.6.13 + rev: "v0.6.13" hooks: - id: cmake-format additional_dependencies: [pyyaml] @@ -63,38 +132,50 @@ repos: # Check static types with mypy - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.800 + rev: "v0.991" hooks: - id: mypy - # The default Python type ignores .pyi files, so let's rerun if detected - types: [text] - files: ^pybind11.*\.pyi?$ - # Running per-file misbehaves a bit, so just run on all files, it's fast - pass_filenames: false + args: [] + exclude: ^(tests|docs)/ + additional_dependencies: [nox, rich] # Checks the manifest for missing files (native support) - repo: https://github.com/mgedmin/check-manifest - rev: "0.46" + rev: "0.49" hooks: - id: check-manifest # This is a slow hook, so only run this if --hook-stage manual is passed stages: [manual] additional_dependencies: [cmake, ninja] -# The original pybind11 checks for a few C++ style items +# Check for spelling +# Use tools/codespell_ignore_lines_from_errors.py +# to rebuild .codespell-ignore-lines +- repo: https://github.com/codespell-project/codespell + rev: "v2.2.2" + hooks: + - id: codespell + exclude: ".supp$" + args: ["-x", ".codespell-ignore-lines"] + +# Check for common shell mistakes +- repo: https://github.com/shellcheck-py/shellcheck-py + rev: "v0.9.0.2" + hooks: + - id: shellcheck + +# Disallow some common capitalization mistakes - repo: local hooks: - id: disallow-caps name: Disallow improper capitalization language: pygrep - entry: PyBind|Numpy|Cmake|CCache - exclude: .pre-commit-config.yaml + entry: PyBind|Numpy|Cmake|CCache|PyTest + exclude: ^\.pre-commit-config.yaml$ -- repo: local +# Clang format the codebase automatically +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: "v15.0.6" hooks: - - id: check-style - name: Classic check-style - language: system - types: - - c++ - entry: ./tools/check-style.sh + - id: clang-format + types_or: [c++, c, cuda] diff --git a/ext/pybind11/CMakeLists.txt b/ext/pybind11/CMakeLists.txt index 2e81869c3f..0d93203881 100644 --- a/ext/pybind11/CMakeLists.txt +++ b/ext/pybind11/CMakeLists.txt @@ -16,6 +16,11 @@ else() cmake_policy(VERSION 3.22) endif() +# Avoid infinite recursion if tests include this as a subdirectory +if(DEFINED PYBIND11_MASTER_PROJECT) + return() +endif() + # Extract project version from source file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/pybind11/detail/common.h" pybind11_version_defines REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ") @@ -45,13 +50,8 @@ if(NOT pybind11_FIND_QUIETLY) message(STATUS "pybind11 v${pybind11_VERSION} ${pybind11_VERSION_TYPE}") endif() -# Avoid infinite recursion if tests include this as a subdirectory -if(DEFINED PYBIND11_MASTER_PROJECT) - set(PYBIND11_TEST OFF) -endif() - # Check if pybind11 is being used directly or via add_subdirectory -if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND NOT DEFINED PYBIND11_MASTER_PROJECT) +if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) ### Warn if not an out-of-source builds if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) set(lines @@ -80,6 +80,8 @@ if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND NOT DEFINED PYBIND11_MASTER_ endif() set(pybind11_system "") + + set_property(GLOBAL PROPERTY USE_FOLDERS ON) else() set(PYBIND11_MASTER_PROJECT OFF) set(pybind11_system SYSTEM) @@ -89,10 +91,16 @@ endif() option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT}) option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT}) option(PYBIND11_NOPYTHON "Disable search for Python" OFF) +option(PYBIND11_SIMPLE_GIL_MANAGEMENT + "Use simpler GIL management logic that does not support disassociation" OFF) set(PYBIND11_INTERNALS_VERSION "" CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.") +if(PYBIND11_SIMPLE_GIL_MANAGEMENT) + add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT) +endif() + cmake_dependent_option( USE_PYTHON_INCLUDE_DIR "Install pybind11 headers in Python include directory instead of default installation prefix" @@ -118,6 +126,8 @@ set(PYBIND11_HEADERS include/pybind11/complex.h include/pybind11/options.h include/pybind11/eigen.h + include/pybind11/eigen/matrix.h + include/pybind11/eigen/tensor.h include/pybind11/embed.h include/pybind11/eval.h include/pybind11/gil.h @@ -196,6 +206,9 @@ else() endif() include("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11Common.cmake") +# https://github.com/jtojnar/cmake-snips/#concatenating-paths-when-building-pkg-config-files +# TODO: cmake 3.20 adds the cmake_path() function, which obsoletes this snippet +include("${CMAKE_CURRENT_SOURCE_DIR}/tools/JoinPaths.cmake") # Relative directory setting if(USE_PYTHON_INCLUDE_DIR AND DEFINED Python_INCLUDE_DIRS) @@ -260,6 +273,16 @@ if(PYBIND11_INSTALL) NAMESPACE "pybind11::" DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + # pkg-config support + if(NOT prefix_for_pc_file) + set(prefix_for_pc_file "${CMAKE_INSTALL_PREFIX}") + endif() + join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/pybind11.pc" @ONLY) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/pybind11.pc" + DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/pkgconfig/") + # Uninstall target if(PYBIND11_MASTER_PROJECT) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake_uninstall.cmake.in" diff --git a/ext/pybind11/MANIFEST.in b/ext/pybind11/MANIFEST.in index aed183e874..033303a74a 100644 --- a/ext/pybind11/MANIFEST.in +++ b/ext/pybind11/MANIFEST.in @@ -1,6 +1,5 @@ recursive-include pybind11/include/pybind11 *.h recursive-include pybind11 *.py recursive-include pybind11 py.typed -recursive-include pybind11 *.pyi include pybind11/share/cmake/pybind11/*.cmake include LICENSE README.rst pyproject.toml setup.py setup.cfg diff --git a/ext/pybind11/README.rst b/ext/pybind11/README.rst index 7ce57b03ae..3c75edb575 100644 --- a/ext/pybind11/README.rst +++ b/ext/pybind11/README.rst @@ -32,9 +32,9 @@ this heavy machinery has become an excessively large and unnecessary dependency. Think of this library as a tiny self-contained version of Boost.Python -with everything stripped away that isn’t relevant for binding +with everything stripped away that isn't relevant for binding generation. Without comments, the core header files only require ~4K -lines of code and depend on Python (2.7 or 3.5+, or PyPy) and the C++ +lines of code and depend on Python (3.6+, or PyPy) and the C++ standard library. This compact implementation was possible thanks to some of the new C++11 language features (specifically: tuples, lambda functions and variadic templates). Since its creation, this library has @@ -78,8 +78,8 @@ Goodies In addition to the core functionality, pybind11 provides some extra goodies: -- Python 2.7, 3.5+, and PyPy/PyPy3 7.3 are supported with an - implementation-agnostic interface. +- Python 3.6+, and PyPy3 7.3 are supported with an implementation-agnostic + interface (pybind11 2.9 was the last version to support Python 2 and 3.5). - It is possible to bind C++11 lambda functions with captured variables. The lambda capture data is stored inside the resulting @@ -88,8 +88,8 @@ goodies: - pybind11 uses C++11 move constructors and move assignment operators whenever possible to efficiently transfer custom data types. -- It’s easy to expose the internal storage of custom data types through - Pythons’ buffer protocols. This is handy e.g. for fast conversion +- It's easy to expose the internal storage of custom data types through + Pythons' buffer protocols. This is handy e.g. for fast conversion between C++ matrix classes like Eigen and NumPy without expensive copy operations. @@ -106,7 +106,7 @@ goodies: - Binaries are generally smaller by a factor of at least 2 compared to equivalent bindings generated by Boost.Python. A recent pybind11 conversion of PyRosetta, an enormous Boost.Python binding project, - `reported `_ + `reported `_ a binary size reduction of **5.4x** and compile time reduction by **5.8x**. @@ -119,10 +119,10 @@ goodies: Supported compilers ------------------- -1. Clang/LLVM 3.3 or newer (for Apple Xcode’s clang, this is 5.0.0 or +1. Clang/LLVM 3.3 or newer (for Apple Xcode's clang, this is 5.0.0 or newer) 2. GCC 4.8 or newer -3. Microsoft Visual Studio 2015 Update 3 or newer +3. Microsoft Visual Studio 2017 or newer 4. Intel classic C++ compiler 18 or newer (ICC 20.2 tested in CI) 5. Cygwin/GCC (previously tested on 2.5.1) 6. NVCC (CUDA 11.0 tested in CI) diff --git a/ext/pybind11/docs/Doxyfile b/ext/pybind11/docs/Doxyfile index c8562952ef..09138db364 100644 --- a/ext/pybind11/docs/Doxyfile +++ b/ext/pybind11/docs/Doxyfile @@ -18,6 +18,4 @@ ALIASES += "endrst=\endverbatim" QUIET = YES WARNINGS = YES WARN_IF_UNDOCUMENTED = NO -PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS \ - PY_MAJOR_VERSION=3 \ - PYBIND11_NOINLINE +PREDEFINED = PYBIND11_NOINLINE diff --git a/ext/pybind11/docs/_static/css/custom.css b/ext/pybind11/docs/_static/css/custom.css new file mode 100644 index 0000000000..7a49a6ac4f --- /dev/null +++ b/ext/pybind11/docs/_static/css/custom.css @@ -0,0 +1,3 @@ +.highlight .go { + color: #707070; +} diff --git a/ext/pybind11/docs/_static/theme_overrides.css b/ext/pybind11/docs/_static/theme_overrides.css deleted file mode 100644 index 1071809fa0..0000000000 --- a/ext/pybind11/docs/_static/theme_overrides.css +++ /dev/null @@ -1,11 +0,0 @@ -.wy-table-responsive table td, -.wy-table-responsive table th { - white-space: initial !important; -} -.rst-content table.docutils td { - vertical-align: top !important; -} -div[class^='highlight'] pre { - white-space: pre; - white-space: pre-wrap; -} diff --git a/ext/pybind11/docs/advanced/cast/custom.rst b/ext/pybind11/docs/advanced/cast/custom.rst index 19b9353476..8138cac619 100644 --- a/ext/pybind11/docs/advanced/cast/custom.rst +++ b/ext/pybind11/docs/advanced/cast/custom.rst @@ -38,7 +38,7 @@ type is explicitly allowed. .. code-block:: cpp - namespace pybind11 { namespace detail { + namespace PYBIND11_NAMESPACE { namespace detail { template <> struct type_caster { public: /** @@ -46,7 +46,7 @@ type is explicitly allowed. * function signatures and declares a local variable * 'value' of type inty */ - PYBIND11_TYPE_CASTER(inty, _("inty")); + PYBIND11_TYPE_CASTER(inty, const_name("inty")); /** * Conversion part 1 (Python->C++): convert a PyObject into a inty @@ -78,7 +78,7 @@ type is explicitly allowed. return PyLong_FromLong(src.long_value); } }; - }} // namespace pybind11::detail + }} // namespace PYBIND11_NAMESPACE::detail .. note:: diff --git a/ext/pybind11/docs/advanced/cast/eigen.rst b/ext/pybind11/docs/advanced/cast/eigen.rst index 80f1013430..a5c11a3f14 100644 --- a/ext/pybind11/docs/advanced/cast/eigen.rst +++ b/ext/pybind11/docs/advanced/cast/eigen.rst @@ -52,7 +52,7 @@ can be mapped *and* if the numpy array is writeable (that is the passed variable will be transparently carried out directly on the ``numpy.ndarray``. -This means you can can write code such as the following and have it work as +This means you can write code such as the following and have it work as expected: .. code-block:: cpp diff --git a/ext/pybind11/docs/advanced/cast/overview.rst b/ext/pybind11/docs/advanced/cast/overview.rst index 6341fce6d4..011bd4c7a3 100644 --- a/ext/pybind11/docs/advanced/cast/overview.rst +++ b/ext/pybind11/docs/advanced/cast/overview.rst @@ -75,97 +75,96 @@ The following basic data types are supported out of the box (some may require an additional extension header to be included). To pass other data structures as arguments and return values, refer to the section on binding :ref:`classes`. -+------------------------------------+---------------------------+-------------------------------+ -| Data type | Description | Header file | -+====================================+===========================+===============================+ -| ``int8_t``, ``uint8_t`` | 8-bit integers | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``int16_t``, ``uint16_t`` | 16-bit integers | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``int32_t``, ``uint32_t`` | 32-bit integers | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``int64_t``, ``uint64_t`` | 64-bit integers | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``ssize_t``, ``size_t`` | Platform-dependent size | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``float``, ``double`` | Floating point types | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``bool`` | Two-state Boolean type | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``char`` | Character literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``char16_t`` | UTF-16 character literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``char32_t`` | UTF-32 character literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``wchar_t`` | Wide character literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``const char *`` | UTF-8 string literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``const char16_t *`` | UTF-16 string literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``const char32_t *`` | UTF-32 string literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``const wchar_t *`` | Wide string literal | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::string`` | STL dynamic UTF-8 string | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::u16string`` | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::u32string`` | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::wstring`` | STL dynamic wide string | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::string_view``, | STL C++17 string views | :file:`pybind11/pybind11.h` | -| ``std::u16string_view``, etc. | | | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::pair`` | Pair of two custom types | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::tuple<...>`` | Arbitrary tuple of types | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::reference_wrapper<...>`` | Reference type wrapper | :file:`pybind11/pybind11.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::complex`` | Complex numbers | :file:`pybind11/complex.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::array`` | STL static array | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::vector`` | STL dynamic array | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::deque`` | STL double-ended queue | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::valarray`` | STL value array | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::list`` | STL linked list | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::map`` | STL ordered map | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::unordered_map`` | STL unordered map | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::set`` | STL ordered set | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::unordered_set`` | STL unordered set | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::optional`` | STL optional type (C++17) | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::experimental::optional`` | STL optional type (exp.) | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::variant<...>`` | Type-safe union (C++17) | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::filesystem::path`` | STL path (C++17) [#]_ | :file:`pybind11/stl.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::function<...>`` | STL polymorphic function | :file:`pybind11/functional.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::chrono::duration<...>`` | STL time duration | :file:`pybind11/chrono.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``std::chrono::time_point<...>`` | STL date/time | :file:`pybind11/chrono.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``Eigen::Matrix<...>`` | Eigen: dense matrix | :file:`pybind11/eigen.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``Eigen::Map<...>`` | Eigen: mapped memory | :file:`pybind11/eigen.h` | -+------------------------------------+---------------------------+-------------------------------+ -| ``Eigen::SparseMatrix<...>`` | Eigen: sparse matrix | :file:`pybind11/eigen.h` | -+------------------------------------+---------------------------+-------------------------------+ ++------------------------------------+---------------------------+-----------------------------------+ +| Data type | Description | Header file | ++====================================+===========================+===================================+ +| ``int8_t``, ``uint8_t`` | 8-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``int16_t``, ``uint16_t`` | 16-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``int32_t``, ``uint32_t`` | 32-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``int64_t``, ``uint64_t`` | 64-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``ssize_t``, ``size_t`` | Platform-dependent size | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``float``, ``double`` | Floating point types | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``bool`` | Two-state Boolean type | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``char`` | Character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``char16_t`` | UTF-16 character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``char32_t`` | UTF-32 character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``wchar_t`` | Wide character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const char *`` | UTF-8 string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const char16_t *`` | UTF-16 string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const char32_t *`` | UTF-32 string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const wchar_t *`` | Wide string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::string`` | STL dynamic UTF-8 string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::u16string`` | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::u32string`` | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::wstring`` | STL dynamic wide string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::string_view``, | STL C++17 string views | :file:`pybind11/pybind11.h` | +| ``std::u16string_view``, etc. | | | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::pair`` | Pair of two custom types | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::tuple<...>`` | Arbitrary tuple of types | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::reference_wrapper<...>`` | Reference type wrapper | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::complex`` | Complex numbers | :file:`pybind11/complex.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::array`` | STL static array | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::vector`` | STL dynamic array | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::deque`` | STL double-ended queue | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::valarray`` | STL value array | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::list`` | STL linked list | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::map`` | STL ordered map | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::unordered_map`` | STL unordered map | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::set`` | STL ordered set | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::unordered_set`` | STL unordered set | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::optional`` | STL optional type (C++17) | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::experimental::optional`` | STL optional type (exp.) | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::variant<...>`` | Type-safe union (C++17) | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::filesystem::path`` | STL path (C++17) [#]_ | :file:`pybind11/stl/filesystem.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::function<...>`` | STL polymorphic function | :file:`pybind11/functional.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::chrono::duration<...>`` | STL time duration | :file:`pybind11/chrono.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::chrono::time_point<...>`` | STL date/time | :file:`pybind11/chrono.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``Eigen::Matrix<...>`` | Eigen: dense matrix | :file:`pybind11/eigen.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``Eigen::Map<...>`` | Eigen: mapped memory | :file:`pybind11/eigen.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``Eigen::SparseMatrix<...>`` | Eigen: sparse matrix | :file:`pybind11/eigen.h` | ++------------------------------------+---------------------------+-----------------------------------+ .. [#] ``std::filesystem::path`` is converted to ``pathlib.Path`` and - ``os.PathLike`` is converted to ``std::filesystem::path``, but this requires - Python 3.6 (for ``__fspath__`` support). + ``os.PathLike`` is converted to ``std::filesystem::path``. diff --git a/ext/pybind11/docs/advanced/cast/stl.rst b/ext/pybind11/docs/advanced/cast/stl.rst index b8622ee095..03d49b2950 100644 --- a/ext/pybind11/docs/advanced/cast/stl.rst +++ b/ext/pybind11/docs/advanced/cast/stl.rst @@ -42,7 +42,7 @@ types: .. code-block:: cpp // `boost::optional` as an example -- can be any `std::optional`-like container - namespace pybind11 { namespace detail { + namespace PYBIND11_NAMESPACE { namespace detail { template struct type_caster> : optional_caster> {}; }} @@ -54,7 +54,7 @@ for custom variant types: .. code-block:: cpp // `boost::variant` as an example -- can be any `std::variant`-like container - namespace pybind11 { namespace detail { + namespace PYBIND11_NAMESPACE { namespace detail { template struct type_caster> : variant_caster> {}; @@ -66,7 +66,7 @@ for custom variant types: return boost::apply_visitor(args...); } }; - }} // namespace pybind11::detail + }} // namespace PYBIND11_NAMESPACE::detail The ``visit_helper`` specialization is not required if your ``name::variant`` provides a ``name::visit()`` function. For any other function name, the specialization must be @@ -87,8 +87,6 @@ included to tell pybind11 how to visit the variant. pybind11 only supports the modern implementation of ``boost::variant`` which makes use of variadic templates. This requires Boost 1.56 or newer. - Additionally, on Windows, MSVC 2017 is required because ``boost::variant`` - falls back to the old non-variadic implementation on MSVC 2015. .. _opaque: diff --git a/ext/pybind11/docs/advanced/cast/strings.rst b/ext/pybind11/docs/advanced/cast/strings.rst index cfd7e7b7a5..e246c5219a 100644 --- a/ext/pybind11/docs/advanced/cast/strings.rst +++ b/ext/pybind11/docs/advanced/cast/strings.rst @@ -1,14 +1,6 @@ Strings, bytes and Unicode conversions ###################################### -.. note:: - - This section discusses string handling in terms of Python 3 strings. For - Python 2.7, replace all occurrences of ``str`` with ``unicode`` and - ``bytes`` with ``str``. Python 2.7 users may find it best to use ``from - __future__ import unicode_literals`` to avoid unintentionally using ``str`` - instead of ``unicode``. - Passing Python strings to C++ ============================= @@ -58,9 +50,9 @@ Passing bytes to C++ -------------------- A Python ``bytes`` object will be passed to C++ functions that accept -``std::string`` or ``char*`` *without* conversion. On Python 3, in order to -make a function *only* accept ``bytes`` (and not ``str``), declare it as taking -a ``py::bytes`` argument. +``std::string`` or ``char*`` *without* conversion. In order to make a function +*only* accept ``bytes`` (and not ``str``), declare it as taking a ``py::bytes`` +argument. Returning C++ strings to Python @@ -204,11 +196,6 @@ decoded to Python ``str``. } ); -.. warning:: - - Wide character strings may not work as described on Python 2.7 or Python - 3.3 compiled with ``--enable-unicode=ucs2``. - Strings in multibyte encodings such as Shift-JIS must transcoded to a UTF-8/16/32 before being returned to Python. diff --git a/ext/pybind11/docs/advanced/classes.rst b/ext/pybind11/docs/advanced/classes.rst index 6330af5eb4..01a490b721 100644 --- a/ext/pybind11/docs/advanced/classes.rst +++ b/ext/pybind11/docs/advanced/classes.rst @@ -9,7 +9,7 @@ that you are already familiar with the basics from :doc:`/classes`. Overriding virtual functions in Python ====================================== -Suppose that a C++ class or interface has a virtual function that we'd like to +Suppose that a C++ class or interface has a virtual function that we'd like to override from within Python (we'll focus on the class ``Animal``; ``Dog`` is given as a specific example of how one would do this with traditional C++ code). @@ -133,14 +133,14 @@ a virtual method call. >>> from example import * >>> d = Dog() >>> call_go(d) - u'woof! woof! woof! ' + 'woof! woof! woof! ' >>> class Cat(Animal): ... def go(self, n_times): ... return "meow! " * n_times ... >>> c = Cat() >>> call_go(c) - u'meow! meow! meow! ' + 'meow! meow! meow! ' If you are defining a custom constructor in a derived Python class, you *must* ensure that you explicitly call the bound C++ constructor using ``__init__``, @@ -813,26 +813,21 @@ An instance can now be pickled as follows: .. code-block:: python - try: - import cPickle as pickle # Use cPickle on Python 2.7 - except ImportError: - import pickle + import pickle p = Pickleable("test_value") p.setExtra(15) - data = pickle.dumps(p, 2) + data = pickle.dumps(p) .. note:: - Note that only the cPickle module is supported on Python 2.7. - - The second argument to ``dumps`` is also crucial: it selects the pickle - protocol version 2, since the older version 1 is not supported. Newer - versions are also fine—for instance, specify ``-1`` to always use the - latest available version. Beware: failure to follow these instructions - will cause important pybind11 memory allocation routines to be skipped - during unpickling, which will likely lead to memory corruption and/or - segmentation faults. + If given, the second argument to ``dumps`` must be 2 or larger - 0 and 1 are + not supported. Newer versions are also fine; for instance, specify ``-1`` to + always use the latest available version. Beware: failure to follow these + instructions will cause important pybind11 memory allocation routines to be + skipped during unpickling, which will likely lead to memory corruption + and/or segmentation faults. Python defaults to version 3 (Python 3-3.7) and + version 4 for Python 3.8+. .. seealso:: @@ -849,11 +844,9 @@ Python normally uses references in assignments. Sometimes a real copy is needed to prevent changing all copies. The ``copy`` module [#f5]_ provides these capabilities. -On Python 3, a class with pickle support is automatically also (deep)copy +A class with pickle support is automatically also (deep)copy compatible. However, performance can be improved by adding custom -``__copy__`` and ``__deepcopy__`` methods. With Python 2.7, these custom methods -are mandatory for (deep)copy compatibility, because pybind11 only supports -cPickle. +``__copy__`` and ``__deepcopy__`` methods. For simple classes (deep)copy can be enabled by using the copy constructor, which should look as follows: @@ -1125,13 +1118,6 @@ described trampoline: py::class_(m, "A") // <-- `Trampoline` here .def("foo", &Publicist::foo); // <-- `Publicist` here, not `Trampoline`! -.. note:: - - MSVC 2015 has a compiler bug (fixed in version 2017) which - requires a more explicit function binding in the form of - ``.def("foo", static_cast(&Publicist::foo));`` - where ``int (A::*)() const`` is the type of ``A::foo``. - Binding final classes ===================== @@ -1161,6 +1147,58 @@ error: .. versionadded:: 2.6 +Binding classes with template parameters +======================================== + +pybind11 can also wrap classes that have template parameters. Consider these classes: + +.. code-block:: cpp + + struct Cat {}; + struct Dog {}; + + template + struct Cage { + Cage(PetType& pet); + PetType& get(); + }; + +C++ templates may only be instantiated at compile time, so pybind11 can only +wrap instantiated templated classes. You cannot wrap a non-instantiated template: + +.. code-block:: cpp + + // BROKEN (this will not compile) + py::class_(m, "Cage"); + .def("get", &Cage::get); + +You must explicitly specify each template/type combination that you want to +wrap separately. + +.. code-block:: cpp + + // ok + py::class_>(m, "CatCage") + .def("get", &Cage::get); + + // ok + py::class_>(m, "DogCage") + .def("get", &Cage::get); + +If your class methods have template parameters you can wrap those as well, +but once again each instantiation must be explicitly specified: + +.. code-block:: cpp + + typename + struct MyClass { + template + T fn(V v); + }; + + py::class>(m, "MyClassT") + .def("fn", &MyClass::fn); + Custom automatic downcasters ============================ @@ -1190,7 +1228,7 @@ whether a downcast is safe, you can proceed by specializing the std::string bark() const { return sound; } }; - namespace pybind11 { + namespace PYBIND11_NAMESPACE { template<> struct polymorphic_type_hook { static const void *get(const Pet *src, const std::type_info*& type) { // note that src may be nullptr @@ -1201,7 +1239,7 @@ whether a downcast is safe, you can proceed by specializing the return src; } }; - } // namespace pybind11 + } // namespace PYBIND11_NAMESPACE When pybind11 wants to convert a C++ pointer of type ``Base*`` to a Python object, it calls ``polymorphic_type_hook::get()`` to diff --git a/ext/pybind11/docs/advanced/exceptions.rst b/ext/pybind11/docs/advanced/exceptions.rst index 40f67d7b83..53981dc08f 100644 --- a/ext/pybind11/docs/advanced/exceptions.rst +++ b/ext/pybind11/docs/advanced/exceptions.rst @@ -64,7 +64,7 @@ at its exception handler. +--------------------------------------+--------------------------------------+ Exception translation is not bidirectional. That is, *catching* the C++ -exceptions defined above above will not trap exceptions that originate from +exceptions defined above will not trap exceptions that originate from Python. For that, catch :class:`pybind11::error_already_set`. See :ref:`below ` for further details. @@ -177,9 +177,12 @@ section. may be explicitly (re-)thrown to delegate it to the other, previously-declared existing exception translators. - Note that ``libc++`` and ``libstdc++`` `behave differently `_ - with ``-fvisibility=hidden``. Therefore exceptions that are used across ABI boundaries need to be explicitly exported, as exercised in ``tests/test_exceptions.h``. - See also: "Problems with C++ exceptions" under `GCC Wiki `_. + Note that ``libc++`` and ``libstdc++`` `behave differently under macOS + `_ + with ``-fvisibility=hidden``. Therefore exceptions that are used across ABI + boundaries need to be explicitly exported, as exercised in + ``tests/test_exceptions.h``. See also: + "Problems with C++ exceptions" under `GCC Wiki `_. Local vs Global Exception Translators @@ -328,8 +331,8 @@ an invalid state. Chaining exceptions ('raise from') ================================== -In Python 3.3 a mechanism for indicating that exceptions were caused by other -exceptions was introduced: +Python has a mechanism for indicating that exceptions were caused by other +exceptions: .. code-block:: py @@ -340,7 +343,7 @@ exceptions was introduced: To do a similar thing in pybind11, you can use the ``py::raise_from`` function. It sets the current python error indicator, so to continue propagating the exception -you should ``throw py::error_already_set()`` (Python 3 only). +you should ``throw py::error_already_set()``. .. code-block:: cpp diff --git a/ext/pybind11/docs/advanced/functions.rst b/ext/pybind11/docs/advanced/functions.rst index abd1084ab5..69e3d8a1df 100644 --- a/ext/pybind11/docs/advanced/functions.rst +++ b/ext/pybind11/docs/advanced/functions.rst @@ -120,7 +120,7 @@ targeted arguments can be passed through the :class:`cpp_function` constructor: .. code-block:: cpp class_(m, "MyClass") - .def_property("data" + .def_property("data", py::cpp_function(&MyClass::getData, py::return_value_policy::copy), py::cpp_function(&MyClass::setData) ); @@ -306,8 +306,9 @@ The class ``py::args`` derives from ``py::tuple`` and ``py::kwargs`` derives from ``py::dict``. You may also use just one or the other, and may combine these with other -arguments as long as the ``py::args`` and ``py::kwargs`` arguments are the last -arguments accepted by the function. +arguments. Note, however, that ``py::kwargs`` must always be the last argument +of the function, and ``py::args`` implies that any further arguments are +keyword-only (see :ref:`keyword_only_arguments`). Please refer to the other examples for details on how to iterate over these, and on how to cast their entries into C++ objects. A demonstration is also @@ -366,10 +367,12 @@ like so: py::class_("MyClass") .def("myFunction", py::arg("arg") = static_cast(nullptr)); +.. _keyword_only_arguments: + Keyword-only arguments ====================== -Python 3 introduced keyword-only arguments by specifying an unnamed ``*`` +Python implements keyword-only arguments by specifying an unnamed ``*`` argument in a function definition: .. code-block:: python @@ -392,11 +395,19 @@ argument annotations when registering the function: m.def("f", [](int a, int b) { /* ... */ }, py::arg("a"), py::kw_only(), py::arg("b")); -Note that you currently cannot combine this with a ``py::args`` argument. This -feature does *not* require Python 3 to work. - .. versionadded:: 2.6 +A ``py::args`` argument implies that any following arguments are keyword-only, +as if ``py::kw_only()`` had been specified in the same relative location of the +argument list as the ``py::args`` argument. The ``py::kw_only()`` may be +included to be explicit about this, but is not required. + +.. versionchanged:: 2.9 + This can now be combined with ``py::args``. Before, ``py::args`` could only + occur at the end of the argument list, or immediately before a ``py::kwargs`` + argument at the end. + + Positional-only arguments ========================= @@ -566,3 +577,38 @@ prefers earlier-defined overloads to later-defined ones. .. versionadded:: 2.6 The ``py::prepend()`` tag. + +Binding functions with template parameters +========================================== + +You can bind functions that have template parameters. Here's a function: + +.. code-block:: cpp + + template + void set(T t); + +C++ templates cannot be instantiated at runtime, so you cannot bind the +non-instantiated function: + +.. code-block:: cpp + + // BROKEN (this will not compile) + m.def("set", &set); + +You must bind each instantiated function template separately. You may bind +each instantiation with the same name, which will be treated the same as +an overloaded function: + +.. code-block:: cpp + + m.def("set", &set); + m.def("set", &set); + +Sometimes it's more clear to bind them with separate names, which is also +an option: + +.. code-block:: cpp + + m.def("setInt", &set); + m.def("setString", &set); diff --git a/ext/pybind11/docs/advanced/misc.rst b/ext/pybind11/docs/advanced/misc.rst index edab15fcb7..805ec838fc 100644 --- a/ext/pybind11/docs/advanced/misc.rst +++ b/ext/pybind11/docs/advanced/misc.rst @@ -39,15 +39,42 @@ The ``PYBIND11_MAKE_OPAQUE`` macro does *not* require the above workarounds. Global Interpreter Lock (GIL) ============================= -When calling a C++ function from Python, the GIL is always held. +The Python C API dictates that the Global Interpreter Lock (GIL) must always +be held by the current thread to safely access Python objects. As a result, +when Python calls into C++ via pybind11 the GIL must be held, and pybind11 +will never implicitly release the GIL. + +.. code-block:: cpp + + void my_function() { + /* GIL is held when this function is called from Python */ + } + + PYBIND11_MODULE(example, m) { + m.def("my_function", &my_function); + } + +pybind11 will ensure that the GIL is held when it knows that it is calling +Python code. For example, if a Python callback is passed to C++ code via +``std::function``, when C++ code calls the function the built-in wrapper +will acquire the GIL before calling the Python callback. Similarly, the +``PYBIND11_OVERRIDE`` family of macros will acquire the GIL before calling +back into Python. + +When writing C++ code that is called from other C++ code, if that code accesses +Python state, it must explicitly acquire and release the GIL. + The classes :class:`gil_scoped_release` and :class:`gil_scoped_acquire` can be used to acquire and release the global interpreter lock in the body of a C++ function call. In this way, long-running C++ code can be parallelized using -multiple Python threads. Taking :ref:`overriding_virtuals` as an example, this +multiple Python threads, **but great care must be taken** when any +:class:`gil_scoped_release` appear: if there is any way that the C++ code +can access Python objects, :class:`gil_scoped_acquire` should be used to +reacquire the GIL. Taking :ref:`overriding_virtuals` as an example, this could be realized as follows (important changes highlighted): .. code-block:: cpp - :emphasize-lines: 8,9,31,32 + :emphasize-lines: 8,30,31 class PyAnimal : public Animal { public: @@ -56,9 +83,7 @@ could be realized as follows (important changes highlighted): /* Trampoline (need one for each virtual function) */ std::string go(int n_times) { - /* Acquire GIL before calling Python code */ - py::gil_scoped_acquire acquire; - + /* PYBIND11_OVERRIDE_PURE will acquire the GIL before accessing Python state */ PYBIND11_OVERRIDE_PURE( std::string, /* Return type */ Animal, /* Parent class */ @@ -78,7 +103,8 @@ could be realized as follows (important changes highlighted): .def(py::init<>()); m.def("call_go", [](Animal *animal) -> std::string { - /* Release GIL before calling into (potentially long-running) C++ code */ + // GIL is held when called from Python code. Release GIL before + // calling into (potentially long-running) C++ code py::gil_scoped_release release; return call_go(animal); }); @@ -92,6 +118,34 @@ The ``call_go`` wrapper can also be simplified using the ``call_guard`` policy m.def("call_go", &call_go, py::call_guard()); +Common Sources Of Global Interpreter Lock Errors +================================================================== + +Failing to properly hold the Global Interpreter Lock (GIL) is one of the +more common sources of bugs within code that uses pybind11. If you are +running into GIL related errors, we highly recommend you consult the +following checklist. + +- Do you have any global variables that are pybind11 objects or invoke + pybind11 functions in either their constructor or destructor? You are generally + not allowed to invoke any Python function in a global static context. We recommend + using lazy initialization and then intentionally leaking at the end of the program. + +- Do you have any pybind11 objects that are members of other C++ structures? One + commonly overlooked requirement is that pybind11 objects have to increase their reference count + whenever their copy constructor is called. Thus, you need to be holding the GIL to invoke + the copy constructor of any C++ class that has a pybind11 member. This can sometimes be very + tricky to track for complicated programs Think carefully when you make a pybind11 object + a member in another struct. + +- C++ destructors that invoke Python functions can be particularly troublesome as + destructors can sometimes get invoked in weird and unexpected circumstances as a result + of exceptions. + +- You should try running your code in a debug build. That will enable additional assertions + within pybind11 that will throw exceptions on certain GIL handling errors + (reference counting operations). + Binding sequence data types, iterators, the slicing protocol, etc. ================================================================== @@ -298,6 +352,15 @@ The class ``options`` allows you to selectively suppress auto-generated signatur m.def("add", [](int a, int b) { return a + b; }, "A function which adds two numbers"); } +pybind11 also appends all members of an enum to the resulting enum docstring. +This default behavior can be disabled by using the ``disable_enum_members_docstring()`` +function of the ``options`` class. + +With ``disable_user_defined_docstrings()`` all user defined docstrings of +``module_::def()``, ``class_::def()`` and ``enum_()`` are disabled, but the +function signatures and enum members are included in the docstring, unless they +are disabled separately. + Note that changes to the settings affect only function bindings created during the lifetime of the ``options`` instance. When it goes out of scope at the end of the module's init function, the default settings are restored to prevent unwanted side effects. diff --git a/ext/pybind11/docs/advanced/pycpp/numpy.rst b/ext/pybind11/docs/advanced/pycpp/numpy.rst index 30daeefff9..07c969305d 100644 --- a/ext/pybind11/docs/advanced/pycpp/numpy.rst +++ b/ext/pybind11/docs/advanced/pycpp/numpy.rst @@ -87,7 +87,7 @@ buffer objects (e.g. a NumPy matrix). /* Request a buffer descriptor from Python */ py::buffer_info info = b.request(); - /* Some sanity checks ... */ + /* Some basic validation checks ... */ if (info.format != py::format_descriptor::format()) throw std::runtime_error("Incompatible format: expected a double array!"); @@ -395,11 +395,9 @@ uses of ``py::array``: Ellipsis ======== -Python 3 provides a convenient ``...`` ellipsis notation that is often used to +Python provides a convenient ``...`` ellipsis notation that is often used to slice multidimensional arrays. For instance, the following snippet extracts the middle dimensions of a tensor with the first and last index set to zero. -In Python 2, the syntactic sugar ``...`` is not available, but the singleton -``Ellipsis`` (of type ``ellipsis``) can still be used directly. .. code-block:: python @@ -414,8 +412,6 @@ operation on the C++ side: py::array a = /* A NumPy array */; py::array b = a[py::make_tuple(0, py::ellipsis(), 0)]; -.. versionchanged:: 2.6 - ``py::ellipsis()`` is now also available in Python 2. Memory view =========== @@ -437,7 +433,7 @@ following: { 2, 4 }, // shape (rows, cols) { sizeof(uint8_t) * 4, sizeof(uint8_t) } // strides in bytes ); - }) + }); This approach is meant for providing a ``memoryview`` for a C/C++ buffer not managed by Python. The user is responsible for managing the lifetime of the @@ -453,11 +449,7 @@ We can also use ``memoryview::from_memory`` for a simple 1D contiguous buffer: buffer, // buffer pointer sizeof(uint8_t) * 8 // buffer size ); - }) - -.. note:: - - ``memoryview::from_memory`` is not available in Python 2. + }); .. versionchanged:: 2.6 ``memoryview::from_memory`` added. diff --git a/ext/pybind11/docs/advanced/smart_ptrs.rst b/ext/pybind11/docs/advanced/smart_ptrs.rst index 5a22201095..3c40ce1237 100644 --- a/ext/pybind11/docs/advanced/smart_ptrs.rst +++ b/ext/pybind11/docs/advanced/smart_ptrs.rst @@ -157,7 +157,7 @@ specialized: PYBIND11_DECLARE_HOLDER_TYPE(T, SmartPtr); // Only needed if the type's `.get()` goes by another name - namespace pybind11 { namespace detail { + namespace PYBIND11_NAMESPACE { namespace detail { template struct holder_helper> { // <-- specialization static const T *get(const SmartPtr &p) { return p.getPointer(); } diff --git a/ext/pybind11/docs/basics.rst b/ext/pybind11/docs/basics.rst index 0b1d85cfd3..e9b24c7fa7 100644 --- a/ext/pybind11/docs/basics.rst +++ b/ext/pybind11/docs/basics.rst @@ -32,8 +32,7 @@ The last line will both compile and run the tests. Windows ------- -On Windows, only **Visual Studio 2015** and newer are supported since pybind11 relies -on various C++11 language features that break older versions of Visual Studio. +On Windows, only **Visual Studio 2017** and newer are supported. .. Note:: @@ -109,7 +108,7 @@ a file named :file:`example.cpp` with the following contents: PYBIND11_MODULE(example, m) { m.doc() = "pybind11 example plugin"; // optional module docstring - m.def("add", &add, "A function which adds two numbers"); + m.def("add", &add, "A function that adds two numbers"); } .. [#f1] In practice, implementation and binding code will generally be located @@ -166,12 +165,12 @@ load and execute the example: .. code-block:: pycon $ python - Python 2.7.10 (default, Aug 22 2015, 20:33:39) - [GCC 4.2.1 Compatible Apple LLVM 7.0.0 (clang-700.0.59.1)] on darwin + Python 3.9.10 (main, Jan 15 2022, 11:48:04) + [Clang 13.0.0 (clang-1300.0.29.3)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import example >>> example.add(1, 2) - 3L + 3 >>> .. _keyword_args: diff --git a/ext/pybind11/docs/benchmark.py b/ext/pybind11/docs/benchmark.py index f190793671..2150b6ca78 100644 --- a/ext/pybind11/docs/benchmark.py +++ b/ext/pybind11/docs/benchmark.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import datetime as dt import os import random @@ -12,20 +11,20 @@ def generate_dummy_code_pybind11(nclasses=10): bindings = "" for cl in range(nclasses): - decl += "class cl%03i;\n" % cl + decl += f"class cl{cl:03};\n" decl += "\n" for cl in range(nclasses): - decl += "class cl%03i {\n" % cl + decl += f"class {cl:03} {{\n" decl += "public:\n" - bindings += ' py::class_(m, "cl%03i")\n' % (cl, cl) + bindings += f' py::class_(m, "cl{cl:03}")\n' for fn in range(nfns): ret = random.randint(0, nclasses - 1) params = [random.randint(0, nclasses - 1) for i in range(nargs)] - decl += " cl%03i *fn_%03i(" % (ret, fn) - decl += ", ".join("cl%03i *" % p for p in params) + decl += f" cl{ret:03} *fn_{fn:03}(" + decl += ", ".join(f"cl{p:03} *" for p in params) decl += ");\n" - bindings += ' .def("fn_%03i", &cl%03i::fn_%03i)\n' % (fn, cl, fn) + bindings += f' .def("fn_{fn:03}", &cl{cl:03}::fn_{fn:03})\n' decl += "};\n\n" bindings += " ;\n" @@ -43,23 +42,20 @@ def generate_dummy_code_boost(nclasses=10): bindings = "" for cl in range(nclasses): - decl += "class cl%03i;\n" % cl + decl += f"class cl{cl:03};\n" decl += "\n" for cl in range(nclasses): decl += "class cl%03i {\n" % cl decl += "public:\n" - bindings += ' py::class_("cl%03i")\n' % (cl, cl) + bindings += f' py::class_("cl{cl:03}")\n' for fn in range(nfns): ret = random.randint(0, nclasses - 1) params = [random.randint(0, nclasses - 1) for i in range(nargs)] - decl += " cl%03i *fn_%03i(" % (ret, fn) - decl += ", ".join("cl%03i *" % p for p in params) + decl += f" cl{ret:03} *fn_{fn:03}(" + decl += ", ".join(f"cl{p:03} *" for p in params) decl += ");\n" - bindings += ( - ' .def("fn_%03i", &cl%03i::fn_%03i, py::return_value_policy())\n' - % (fn, cl, fn) - ) + bindings += f' .def("fn_{fn:03}", &cl{cl:03}::fn_{fn:03}, py::return_value_policy())\n' decl += "};\n\n" bindings += " ;\n" @@ -75,7 +71,7 @@ def generate_dummy_code_boost(nclasses=10): for codegen in [generate_dummy_code_pybind11, generate_dummy_code_boost]: print("{") for i in range(0, 10): - nclasses = 2 ** i + nclasses = 2**i with open("test.cpp", "w") as f: f.write(codegen(nclasses)) n1 = dt.datetime.now() diff --git a/ext/pybind11/docs/changelog.rst b/ext/pybind11/docs/changelog.rst index bb5457eec1..bb111c5f26 100644 --- a/ext/pybind11/docs/changelog.rst +++ b/ext/pybind11/docs/changelog.rst @@ -6,10 +6,618 @@ Changelog Starting with version 1.8.0, pybind11 releases use a `semantic versioning `_ policy. +Changes will be added here periodically from the "Suggested changelog entry" +block in pull request descriptions. + IN DEVELOPMENT -------------- +Changes will be summarized here periodically. + +Changes: + +* ``PyGILState_Check()``'s in ``pybind11::handle``'s ``inc_ref()`` & + ``dec_ref()`` are now enabled by default again. + `#4246 `_ + +Build system improvements: + +* Update clang-tidy to 15 in CI. + `#4387 `_ + + +Version 2.10.3 (Jan 3, 2023) +---------------------------- + +Changes: + +* Temporarily made our GIL status assertions (added in 2.10.2) disabled by + default (re-enable manually by defining + ``PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF``, will be enabled in 2.11). + `#4432 `_ + +* Improved error messages when ``inc_ref``/``dec_ref`` are called with an + invalid GIL state. + `#4427 `_ + `#4436 `_ + +Bug Fixes: + +* Some minor touchups found by static analyzers. + `#4440 `_ + + +Version 2.10.2 (Dec 20, 2022) +----------------------------- + +Changes: + +* ``scoped_interpreter`` constructor taking ``PyConfig``. + `#4330 `_ + +* ``pybind11/eigen/tensor.h`` adds converters to and from ``Eigen::Tensor`` and + ``Eigen::TensorMap``. + `#4201 `_ + +* ``PyGILState_Check()``'s were integrated to ``pybind11::handle`` + ``inc_ref()`` & ``dec_ref()``. The added GIL checks are guarded by + ``PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF``, which is the default only if + ``NDEBUG`` is not defined. (Made non-default in 2.10.3, will be active in 2.11) + `#4246 `_ + +* Add option for enable/disable enum members in docstring. + `#2768 `_ + +* Fixed typing of ``KeysView``, ``ValuesView`` and ``ItemsView`` in ``bind_map``. + `#4353 `_ + +Bug fixes: + +* Bug fix affecting only Python 3.6 under very specific, uncommon conditions: + move ``PyEval_InitThreads()`` call to the correct location. + `#4350 `_ + +* Fix segfault bug when passing foreign native functions to functional.h. + `#4254 `_ + +Build system improvements: + +* Support setting PYTHON_LIBRARIES manually for Windows ARM cross-compilation + (classic mode). + `#4406 `_ + +* Extend IPO/LTO detection for ICX (a.k.a IntelLLVM) compiler. + `#4402 `_ + +* Allow calling ``find_package(pybind11 CONFIG)`` multiple times from separate + directories in the same CMake project and properly link Python (new mode). + `#4401 `_ + +* ``multiprocessing_set_spawn`` in pytest fixture for added safety. + `#4377 `_ + +* Fixed a bug in two pybind11/tools cmake scripts causing "Unknown arguments specified" errors. + `#4327 `_ + + + +Version 2.10.1 (Oct 31, 2022) +----------------------------- + +This is the first version to fully support embedding the newly released Python 3.11. + +Changes: + +* Allow ``pybind11::capsule`` constructor to take null destructor pointers. + `#4221 `_ + +* ``embed.h`` was changed so that ``PYTHONPATH`` is used also with Python 3.11 + (established behavior). + `#4119 `_ + +* A ``PYBIND11_SIMPLE_GIL_MANAGEMENT`` option was added (cmake, C++ define), + along with many additional tests in ``test_gil_scoped.py``. The option may be + useful to try when debugging GIL-related issues, to determine if the more + complex default implementation is or is not to blame. See #4216 for + background. WARNING: Please be careful to not create ODR violations when + using the option: everything that is linked together with mutual symbol + visibility needs to be rebuilt. + `#4216 `_ + +* ``PYBIND11_EXPORT_EXCEPTION`` was made non-empty only under macOS. This makes + Linux builds safer, and enables the removal of warning suppression pragmas for + Windows. + `#4298 `_ + +Bug fixes: + +* Fixed a bug where ``UnicodeDecodeError`` was not propagated from various + ``py::str`` ctors when decoding surrogate utf characters. + `#4294 `_ + +* Revert perfect forwarding for ``make_iterator``. This broke at least one + valid use case. May revisit later. + `#4234 `_ + +* Fix support for safe casts to ``void*`` (regression in 2.10.0). + `#4275 `_ + +* Fix ``char8_t`` support (regression in 2.9). + `#4278 `_ + +* Unicode surrogate character in Python exception message leads to process + termination in ``error_already_set::what()``. + `#4297 `_ + +* Fix MSVC 2019 v.1924 & C++14 mode error for ``overload_cast``. + `#4188 `_ + +* Make augmented assignment operators non-const for the object-api. Behavior + was previously broken for augmented assignment operators. + `#4065 `_ + +* Add proper error checking to C++ bindings for Python list append and insert. + `#4208 `_ + +* Work-around for Nvidia's CUDA nvcc compiler in versions 11.4.0 - 11.8.0. + `#4220 `_ + +* A workaround for PyPy was added in the ``py::error_already_set`` + implementation, related to PR `#1895 `_ + released with v2.10.0. + `#4079 `_ + +* Fixed compiler errors when C++23 ``std::forward_like`` is available. + `#4136 `_ + +* Properly raise exceptions in contains methods (like when an object in unhashable). + `#4209 `_ + +* Further improve another error in exception handling. + `#4232 `_ + +* ``get_local_internals()`` was made compatible with + ``finalize_interpreter()``, fixing potential freezes during interpreter + finalization. + `#4192 `_ + +Performance and style: + +* Reserve space in set and STL map casters if possible. This will prevent + unnecessary rehashing / resizing by knowing the number of keys ahead of time + for Python to C++ casting. This improvement will greatly speed up the casting + of large unordered maps and sets. + `#4194 `_ + +* GIL RAII scopes are non-copyable to avoid potential bugs. + `#4183 `_ + +* Explicitly default all relevant ctors for pytypes in the ``PYBIND11_OBJECT`` + macros and enforce the clang-tidy checks ``modernize-use-equals-default`` in + macros as well. + `#4017 `_ + +* Optimize iterator advancement in C++ bindings. + `#4237 `_ + +* Use the modern ``PyObject_GenericGetDict`` and ``PyObject_GenericSetDict`` + for handling dynamic attribute dictionaries. + `#4106 `_ + +* Document that users should use ``PYBIND11_NAMESPACE`` instead of using ``pybind11`` when + opening namespaces. Using namespace declarations and namespace qualification + remain the same as ``pybind11``. This is done to ensure consistent symbol + visibility. + `#4098 `_ + +* Mark ``detail::forward_like`` as constexpr. + `#4147 `_ + +* Optimize unpacking_collector when processing ``arg_v`` arguments. + `#4219 `_ + +* Optimize casting C++ object to ``None``. + `#4269 `_ + + +Build system improvements: + +* CMake: revert overwrite behavior, now opt-in with ``PYBIND11_PYTHONLIBS_OVERRWRITE OFF``. + `#4195 `_ + +* Include a pkg-config file when installing pybind11, such as in the Python + package. + `#4077 `_ + +* Avoid stripping debug symbols when ``CMAKE_BUILD_TYPE`` is set to ``DEBUG`` + instead of ``Debug``. + `#4078 `_ + +* Followup to `#3948 `_, fixing vcpkg again. + `#4123 `_ + +Version 2.10.0 (Jul 15, 2022) +----------------------------- + +Removed support for Python 2.7, Python 3.5, and MSVC 2015. Support for MSVC +2017 is limited due to availability of CI runners; we highly recommend MSVC +2019 or 2022 be used. Initial support added for Python 3.11. + +New features: + +* ``py::anyset`` & ``py::frozenset`` were added, with copying (cast) to + ``std::set`` (similar to ``set``). + `#3901 `_ + +* Support bytearray casting to string. + `#3707 `_ + +* ``type_caster`` was added. ``std::monostate`` is a tag type + that allows ``std::variant`` to act as an optional, or allows default + construction of a ``std::variant`` holding a non-default constructible type. + `#3818 `_ + +* ``pybind11::capsule::set_name`` added to mutate the name of the capsule instance. + `#3866 `_ + +* NumPy: dtype constructor from type number added, accessors corresponding to + Python API ``dtype.num``, ``dtype.byteorder``, ``dtype.flags`` and + ``dtype.alignment`` added. + `#3868 `_ + + +Changes: + +* Python 3.6 is now the minimum supported version. + `#3688 `_ + `#3719 `_ + +* The minimum version for MSVC is now 2017. + `#3722 `_ + +* Fix issues with CPython 3.11 betas and add to supported test matrix. + `#3923 `_ + +* ``error_already_set`` is now safer and more performant, especially for + exceptions with long tracebacks, by delaying computation. + `#1895 `_ + +* Improve exception handling in python ``str`` bindings. + `#3826 `_ + +* The bindings for capsules now have more consistent exception handling. + `#3825 `_ + +* ``PYBIND11_OBJECT_CVT`` and ``PYBIND11_OBJECT_CVT_DEFAULT`` macro can now be + used to define classes in namespaces other than pybind11. + `#3797 `_ + +* Error printing code now uses ``PYBIND11_DETAILED_ERROR_MESSAGES`` instead of + requiring ``NDEBUG``, allowing use with release builds if desired. + `#3913 `_ + +* Implicit conversion of the literal ``0`` to ``pybind11::handle`` is now disabled. + `#4008 `_ + + +Bug fixes: + +* Fix exception handling when ``pybind11::weakref()`` fails. + `#3739 `_ + +* ``module_::def_submodule`` was missing proper error handling. This is fixed now. + `#3973 `_ + +* The behavior or ``error_already_set`` was made safer and the highly opaque + "Unknown internal error occurred" message was replaced with a more helpful + message. + `#3982 `_ + +* ``error_already_set::what()`` now handles non-normalized exceptions correctly. + `#3971 `_ + +* Support older C++ compilers where filesystem is not yet part of the standard + library and is instead included in ``std::experimental::filesystem``. + `#3840 `_ + +* Fix ``-Wfree-nonheap-object`` warnings produced by GCC by avoiding returning + pointers to static objects with ``return_value_policy::take_ownership``. + `#3946 `_ + +* Fix cast from pytype rvalue to another pytype. + `#3949 `_ + +* Ensure proper behavior when garbage collecting classes with dynamic attributes in Python >=3.9. + `#4051 `_ + +* A couple long-standing ``PYBIND11_NAMESPACE`` + ``__attribute__((visibility("hidden")))`` inconsistencies are now fixed + (affects only unusual environments). + `#4043 `_ + +* ``pybind11::detail::get_internals()`` is now resilient to in-flight Python + exceptions. + `#3981 `_ + +* Arrays with a dimension of size 0 are now properly converted to dynamic Eigen + matrices (more common in NumPy 1.23). + `#4038 `_ + +* Avoid catching unrelated errors when importing NumPy. + `#3974 `_ + +Performance and style: + +* Added an accessor overload of ``(object &&key)`` to reference steal the + object when using python types as keys. This prevents unnecessary reference + count overhead for attr, dictionary, tuple, and sequence look ups. Added + additional regression tests. Fixed a performance bug the caused accessor + assignments to potentially perform unnecessary copies. + `#3970 `_ + +* Perfect forward all args of ``make_iterator``. + `#3980 `_ + +* Avoid potential bug in pycapsule destructor by adding an ``error_guard`` to + one of the dtors. + `#3958 `_ + +* Optimize dictionary access in ``strip_padding`` for numpy. + `#3994 `_ + +* ``stl_bind.h`` bindings now take slice args as a const-ref. + `#3852 `_ + +* Made slice constructor more consistent, and improve performance of some + casters by allowing reference stealing. + `#3845 `_ + +* Change numpy dtype from_args method to use const ref. + `#3878 `_ + +* Follow rule of three to ensure ``PyErr_Restore`` is called only once. + `#3872 `_ + +* Added missing perfect forwarding for ``make_iterator`` functions. + `#3860 `_ + +* Optimize c++ to python function casting by using the rvalue caster. + `#3966 `_ + +* Optimize Eigen sparse matrix casting by removing unnecessary temporary. + `#4064 `_ + +* Avoid potential implicit copy/assignment constructors causing double free in + ``strdup_gaurd``. + `#3905 `_ + +* Enable clang-tidy checks ``misc-definitions-in-headers``, + ``modernize-loop-convert``, and ``modernize-use-nullptr``. + `#3881 `_ + `#3988 `_ + + +Build system improvements: + +* CMake: Fix file extension on Windows with cp36 and cp37 using FindPython. + `#3919 `_ + +* CMake: Support multiple Python targets (such as on vcpkg). + `#3948 `_ + +* CMake: Fix issue with NVCC on Windows. + `#3947 `_ + +* CMake: Drop the bitness check on cross compiles (like targeting WebAssembly + via Emscripten). + `#3959 `_ + +* Add MSVC builds in debug mode to CI. + `#3784 `_ + +* MSVC 2022 C++20 coverage was added to GitHub Actions, including Eigen. + `#3732 `_, + `#3741 `_ + + +Backend and tidying up: + +* New theme for the documentation. + `#3109 `_ + +* Remove idioms in code comments. Use more inclusive language. + `#3809 `_ + +* ``#include `` was removed from the ``pybind11/stl.h`` header. Your + project may break if it has a transitive dependency on this include. The fix + is to "Include What You Use". + `#3928 `_ + +* Avoid ``setup.py `` usage in internal tests. + `#3734 `_ + + +Version 2.9.2 (Mar 29, 2022) +---------------------------- + +Changes: + +* Enum now has an ``__index__`` method on Python <3.8 too. + `#3700 `_ + +* Local internals are now cleared after finalizing the interpreter. + `#3744 `_ + +Bug fixes: + +* Better support for Python 3.11 alphas. + `#3694 `_ + +* ``PYBIND11_TYPE_CASTER`` now uses fully qualified symbols, so it can be used + outside of ``pybind11::detail``. + `#3758 `_ + +* Some fixes for PyPy 3.9. + `#3768 `_ + +* Fixed a potential memleak in PyPy in ``get_type_override``. + `#3774 `_ + +* Fix usage of ``VISIBILITY_INLINES_HIDDEN``. + `#3721 `_ + + +Build system improvements: + +* Uses ``sysconfig`` module to determine installation locations on Python >= + 3.10, instead of ``distutils`` which has been deprecated. + `#3764 `_ + +* Support Catch 2.13.5+ (supporting GLIBC 2.34+). + `#3679 `_ + +* Fix test failures with numpy 1.22 by ignoring whitespace when comparing + ``str()`` of dtypes. + `#3682 `_ + + +Backend and tidying up: + +* clang-tidy: added ``readability-qualified-auto``, + ``readability-braces-around-statements``, + ``cppcoreguidelines-prefer-member-initializer``, + ``clang-analyzer-optin.performance.Padding``, + ``cppcoreguidelines-pro-type-static-cast-downcast``, and + ``readability-inconsistent-declaration-parameter-name``. + `#3702 `_, + `#3699 `_, + `#3716 `_, + `#3709 `_ + +* clang-format was added to the pre-commit actions, and the entire code base + automatically reformatted (after several iterations preparing for this leap). + `#3713 `_ + + +Version 2.9.1 (Feb 2, 2022) +--------------------------- + +Changes: + +* If possible, attach Python exception with ``py::raise_from`` to ``TypeError`` + when casting from C++ to Python. This will give additional info if Python + exceptions occur in the caster. Adds a test case of trying to convert a set + from C++ to Python when the hash function is not defined in Python. + `#3605 `_ + +* Add a mapping of C++11 nested exceptions to their Python exception + equivalent using ``py::raise_from``. This attaches the nested exceptions in + Python using the ``__cause__`` field. + `#3608 `_ + +* Propagate Python exception traceback using ``raise_from`` if a pybind11 + function runs out of overloads. + `#3671 `_ + +* ``py::multiple_inheritance`` is now only needed when C++ bases are hidden + from pybind11. + `#3650 `_ and + `#3659 `_ + + +Bug fixes: + +* Remove a boolean cast in ``numpy.h`` that causes MSVC C4800 warnings when + compiling against Python 3.10 or newer. + `#3669 `_ + +* Render ``py::bool_`` and ``py::float_`` as ``bool`` and ``float`` + respectively. + `#3622 `_ + +Build system improvements: + +* Fix CMake extension suffix computation on Python 3.10+. + `#3663 `_ + +* Allow ``CMAKE_ARGS`` to override CMake args in pybind11's own ``setup.py``. + `#3577 `_ + +* Remove a few deprecated c-headers. + `#3610 `_ + +* More uniform handling of test targets. + `#3590 `_ + +* Add clang-tidy readability check to catch potentially swapped function args. + `#3611 `_ + + +Version 2.9.0 (Dec 28, 2021) +---------------------------- + +This is the last version to support Python 2.7 and 3.5. + +New Features: + +* Allow ``py::args`` to be followed by other arguments; the remaining arguments + are implicitly keyword-only, as if a ``py::kw_only{}`` annotation had been + used. + `#3402 `_ + +Changes: + +* Make str/bytes/memoryview more interoperable with ``std::string_view``. + `#3521 `_ + +* Replace ``_`` with ``const_name`` in internals, avoid defining ``pybind::_`` + if ``_`` defined as macro (common gettext usage) + `#3423 `_ + + +Bug fixes: + +* Fix a rare warning about extra copy in an Eigen constructor. + `#3486 `_ + +* Fix caching of the C++ overrides. + `#3465 `_ + +* Add missing ``std::forward`` calls to some ``cpp_function`` overloads. + `#3443 `_ + +* Support PyPy 7.3.7 and the PyPy3.8 beta. Test python-3.11 on PRs with the + ``python dev`` label. + `#3419 `_ + +* Replace usage of deprecated ``Eigen::MappedSparseMatrix`` with + ``Eigen::Map>`` for Eigen 3.3+. + `#3499 `_ + +* Tweaks to support Microsoft Visual Studio 2022. + `#3497 `_ + +Build system improvements: + +* Nicer CMake printout and IDE organisation for pybind11's own tests. + `#3479 `_ + +* CMake: report version type as part of the version string to avoid a spurious + space in the package status message. + `#3472 `_ + +* Flags starting with ``-g`` in ``$CFLAGS`` and ``$CPPFLAGS`` are no longer + overridden by ``.Pybind11Extension``. + `#3436 `_ + +* Ensure ThreadPool is closed in ``setup_helpers``. + `#3548 `_ + +* Avoid LTS on ``mips64`` and ``ppc64le`` (reported broken). + `#3557 `_ + + v2.8.1 (Oct 27, 2021) --------------------- @@ -680,7 +1288,7 @@ Packaging / building improvements: `#2338 `_ and `#2370 `_ - * Full integration with CMake’s C++ standard system and compile features + * Full integration with CMake's C++ standard system and compile features replaces ``PYBIND11_CPP_STANDARD``. * Generated config file is now portable to different Python/compiler/CMake diff --git a/ext/pybind11/docs/classes.rst b/ext/pybind11/docs/classes.rst index 13fa8b5387..c0c53135b8 100644 --- a/ext/pybind11/docs/classes.rst +++ b/ext/pybind11/docs/classes.rst @@ -48,10 +48,10 @@ interactive Python session demonstrating this example is shown below: >>> print(p) >>> p.getName() - u'Molly' + 'Molly' >>> p.setName("Charly") >>> p.getName() - u'Charly' + 'Charly' .. seealso:: @@ -124,10 +124,10 @@ This makes it possible to write >>> p = example.Pet("Molly") >>> p.name - u'Molly' + 'Molly' >>> p.name = "Charly" >>> p.name - u'Charly' + 'Charly' Now suppose that ``Pet::name`` was a private internal variable that can only be accessed via setters and getters. @@ -282,9 +282,9 @@ expose fields and methods of both types: >>> p = example.Dog("Molly") >>> p.name - u'Molly' + 'Molly' >>> p.bark() - u'woof!' + 'woof!' The C++ classes defined above are regular non-polymorphic types with an inheritance relationship. This is reflected in Python: @@ -332,7 +332,7 @@ will automatically recognize this: >>> type(p) PolymorphicDog # automatically downcast >>> p.bark() - u'woof!' + 'woof!' Given a pointer to a polymorphic base, pybind11 performs automatic downcasting to the actual derived type. Note that this goes beyond the usual situation in @@ -434,8 +434,7 @@ you can use ``py::detail::overload_cast_impl`` with an additional set of parenth .def("set", overload_cast_()(&Pet::set), "Set the pet's age") .def("set", overload_cast_()(&Pet::set), "Set the pet's name"); -.. [#cpp14] A compiler which supports the ``-std=c++14`` flag - or Visual Studio 2015 Update 2 and newer. +.. [#cpp14] A compiler which supports the ``-std=c++14`` flag. .. note:: @@ -483,7 +482,7 @@ The binding code for this example looks as follows: .value("Cat", Pet::Kind::Cat) .export_values(); - py::class_ attributes(pet, "Attributes") + py::class_(pet, "Attributes") .def(py::init<>()) .def_readwrite("age", &Pet::Attributes::age); diff --git a/ext/pybind11/docs/compiling.rst b/ext/pybind11/docs/compiling.rst index 75608bd576..2b543be0be 100644 --- a/ext/pybind11/docs/compiling.rst +++ b/ext/pybind11/docs/compiling.rst @@ -417,10 +417,10 @@ existing targets instead: .. code-block:: cmake - cmake_minimum_required(VERSION 3.15...3.19) + cmake_minimum_required(VERSION 3.15...3.22) project(example LANGUAGES CXX) - find_package(Python COMPONENTS Interpreter Development REQUIRED) + find_package(Python 3.6 COMPONENTS Interpreter Development REQUIRED) find_package(pybind11 CONFIG REQUIRED) # or add_subdirectory(pybind11) @@ -433,9 +433,8 @@ algorithms from the CMake invocation, with ``-DPYBIND11_FINDPYTHON=ON``. .. warning:: - If you use FindPython2 and FindPython3 to dual-target Python, use the - individual targets listed below, and avoid targets that directly include - Python parts. + If you use FindPython to multi-target Python versions, use the individual + targets listed below, and avoid targets that directly include Python parts. There are `many ways to hint or force a discovery of a specific Python installation `_), @@ -462,11 +461,8 @@ available in all modes. The targets provided are: ``pybind11::headers`` Just the pybind11 headers and minimum compile requirements - ``pybind11::python2_no_register`` - Quiets the warning/error when mixing C++14 or higher and Python 2 - ``pybind11::pybind11`` - Python headers + ``pybind11::headers`` + ``pybind11::python2_no_register`` (Python 2 only) + Python headers + ``pybind11::headers`` ``pybind11::python_link_helper`` Just the "linking" part of pybind11:module @@ -475,7 +471,7 @@ available in all modes. The targets provided are: Everything for extension modules - ``pybind11::pybind11`` + ``Python::Module`` (FindPython CMake 3.15+) or ``pybind11::python_link_helper`` ``pybind11::embed`` - Everything for embedding the Python interpreter - ``pybind11::pybind11`` + ``Python::Embed`` (FindPython) or Python libs + Everything for embedding the Python interpreter - ``pybind11::pybind11`` + ``Python::Python`` (FindPython) or Python libs ``pybind11::lto`` / ``pybind11::thin_lto`` An alternative to `INTERPROCEDURAL_OPTIMIZATION` for adding link-time optimization. @@ -509,7 +505,10 @@ You can use these targets to build complex applications. For example, the target_link_libraries(example PRIVATE pybind11::module pybind11::lto pybind11::windows_extras) pybind11_extension(example) - pybind11_strip(example) + if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo) + # Strip unnecessary sections of the binary on Linux/macOS + pybind11_strip(example) + endif() set_target_properties(example PROPERTIES CXX_VISIBILITY_PRESET "hidden" CUDA_VISIBILITY_PRESET "hidden") @@ -577,21 +576,12 @@ On Linux, you can compile an example such as the one given in $ c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix) -The flags given here assume that you're using Python 3. For Python 2, just -change the executable appropriately (to ``python`` or ``python2``). - The ``python3 -m pybind11 --includes`` command fetches the include paths for both pybind11 and Python headers. This assumes that pybind11 has been installed using ``pip`` or ``conda``. If it hasn't, you can also manually specify ``-I /include`` together with the Python includes path ``python3-config --includes``. -Note that Python 2.7 modules don't use a special suffix, so you should simply -use ``example.so`` instead of ``example$(python3-config --extension-suffix)``. -Besides, the ``--extension-suffix`` option may or may not be available, depending -on the distribution; in the latter case, the module extension can be manually -set to ``.so``. - On macOS: the build command is almost the same but it also requires passing the ``-undefined dynamic_lookup`` flag so as to ignore missing symbols when building the module: diff --git a/ext/pybind11/docs/conf.py b/ext/pybind11/docs/conf.py index 092e274e09..2da6773f4f 100644 --- a/ext/pybind11/docs/conf.py +++ b/ext/pybind11/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # pybind11 documentation build configuration file, created by # sphinx-quickstart on Sun Oct 11 19:23:48 2015. @@ -36,6 +35,7 @@ DIR = Path(__file__).parent.resolve() # ones. extensions = [ "breathe", + "sphinx_copybutton", "sphinxcontrib.rsvgconverter", "sphinxcontrib.moderncmakedomain", ] @@ -126,23 +126,7 @@ todo_include_todos = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -on_rtd = os.environ.get("READTHEDOCS", None) == "True" - -if not on_rtd: # only import and set the theme if we're building docs locally - import sphinx_rtd_theme - - html_theme = "sphinx_rtd_theme" - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - - html_context = {"css_files": ["_static/theme_overrides.css"]} -else: - html_context = { - "css_files": [ - "//media.readthedocs.org/css/sphinx_rtd_theme.css", - "//media.readthedocs.org/css/readthedocs-doc-embed.css", - "_static/theme_overrides.css", - ] - } +html_theme = "furo" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -173,6 +157,10 @@ else: # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +html_css_files = [ + "css/custom.css", +] + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. @@ -345,9 +333,9 @@ def generate_doxygen_xml(app): subprocess.call(["doxygen", "--version"]) retcode = subprocess.call(["doxygen"], cwd=app.confdir) if retcode < 0: - sys.stderr.write("doxygen error code: {}\n".format(-retcode)) + sys.stderr.write(f"doxygen error code: {-retcode}\n") except OSError as e: - sys.stderr.write("doxygen execution failed: {}\n".format(e)) + sys.stderr.write(f"doxygen execution failed: {e}\n") def prepare(app): diff --git a/ext/pybind11/docs/faq.rst b/ext/pybind11/docs/faq.rst index e2f477b1f5..28498e7dfc 100644 --- a/ext/pybind11/docs/faq.rst +++ b/ext/pybind11/docs/faq.rst @@ -8,9 +8,7 @@ Frequently asked questions filename of the extension library (without suffixes such as ``.so``). 2. If the above did not fix the issue, you are likely using an incompatible -version of Python (for instance, the extension library was compiled against -Python 2, while the interpreter is running on top of some version of Python -3, or vice versa). +version of Python that does not match what you compiled with. "Symbol not found: ``__Py_ZeroStruct`` / ``_PyInstanceMethod_Type``" ======================================================================== @@ -147,7 +145,7 @@ using C++14 template metaprogramming. .. _`faq:hidden_visibility`: -"‘SomeClass’ declared with greater visibility than the type of its field ‘SomeClass::member’ [-Wattributes]" +"'SomeClass' declared with greater visibility than the type of its field 'SomeClass::member' [-Wattributes]" ============================================================================================================ This error typically indicates that you are compiling without the required @@ -222,20 +220,6 @@ In addition to decreasing binary size, ``-fvisibility=hidden`` also avoids potential serious issues when loading multiple modules and is required for proper pybind operation. See the previous FAQ entry for more details. -Working with ancient Visual Studio 2008 builds on Windows -========================================================= - -The official Windows distributions of Python are compiled using truly -ancient versions of Visual Studio that lack good C++11 support. Some users -implicitly assume that it would be impossible to load a plugin built with -Visual Studio 2015 into a Python distribution that was compiled using Visual -Studio 2008. However, no such issue exists: it's perfectly legitimate to -interface DLLs that are built with different compilers and/or C libraries. -Common gotchas to watch out for involve not ``free()``-ing memory region -that that were ``malloc()``-ed in another shared library, using data -structures with incompatible ABIs, and so on. pybind11 is very careful not -to make these types of mistakes. - How can I properly handle Ctrl-C in long-running functions? =========================================================== @@ -289,27 +273,7 @@ Conflicts can arise, however, when using pybind11 in a project that *also* uses the CMake Python detection in a system with several Python versions installed. This difference may cause inconsistencies and errors if *both* mechanisms are -used in the same project. Consider the following CMake code executed in a -system with Python 2.7 and 3.x installed: - -.. code-block:: cmake - - find_package(PythonInterp) - find_package(PythonLibs) - find_package(pybind11) - -It will detect Python 2.7 and pybind11 will pick it as well. - -In contrast this code: - -.. code-block:: cmake - - find_package(pybind11) - find_package(PythonInterp) - find_package(PythonLibs) - -will detect Python 3.x for pybind11 and may crash on -``find_package(PythonLibs)`` afterwards. +used in the same project. There are three possible solutions: diff --git a/ext/pybind11/docs/pybind11-logo.png b/ext/pybind11/docs/pybind11-logo.png index 4cbad54f797d3ced04d4048f282df5e4336d4af4..2d633a4d0c129d6bdd94b4134c9516fdc92bb192 100644 GIT binary patch literal 61034 zcmeGE<9BA=6D@$ow$(w$9XlPnW81cE8=Z7)vt!#G+jcs(-FN5xopbJga6jEKo+lsn zSkFS$teRDG?U4w1*&m2-cyJ&fAczv;B8ng&5LO@{pz<(Kz!R8?TV)Us%rBnG>duM= zZp8MEcBU5ACdAGj_9nz8?iQvXAnvP`>1K6g-yMVh9a6hPl=^eZ3Zz{_@$m6a{l+3q zZZ{hAoqJbSp+MQ$0sB7nob#`1%I~@)nlRh2a!J8zTMpNR>@bRu&u{S64&m?nc-@D+x(SMJUpJ-*v0amFD(c*X~896E5 z9)J(_&w2?wCToJTvjXFFER7z8M#%nWeFxTGvl8`GDrA zBv-bro>tf3cIi4HwluuNhS~$LpTl4{1URtj5K$ zTR9hUC-e%pevi-AE%ocVK2s*=vpEZ+X(eHfR*}fw8gdg6dK7MU4ou8$IjlC#O?b=K zi|6$B{bn0GbtLSYL+Kv(il(E7%jjwoC5s&l5O~Oqwd#d?&i!sbUDg`8oYLZUXkE_a zk$APsvN;dwqVU+bt!)lr)9S8T)oMEo5rbwkVhNy9qUxRfji}{}Gen#89$)UCpM4Fu zHd1 z2I4go-M=U(U=NuuM9+Txqki2K_GuA+ark+EJcPE)Kon8`sK6lg0C<1m?{O+j$?Fr%jrH3FyZ6ngjOZ11Qz2-kWv8U(;YiW&TI#ZSy;4tg?pq&2D2R6D>^o z_b;>pqfSJiw8*sBc$41yJQsT;_~Hw%@c{u@n$+!-3I0JZsPNGkX}b+#!w6sIbS!mO zXxy|R>A_)NSMaj`qz<5~Q!d*+(f z_-sjRy}Njg)8vyR7SO@KP{L@NYKK$N&XiJ7?sa%t%PmH2>klOrLnkolPJfWB3mUDQ zMg=w1!<8ElPSbJNWb{|l3#PVr=y%f27-5$t%Lb3i^-_a;C!&)J%C_X;CHfvF`y` z4EtCNho&#e1r$`nhF9#uTdYbO$4hY2n>cF0Slme2FZfM`mGIRK+3DLdTKSZN>It@X?Y z0x--GX>r`$kIb07SBf9gdxXn=&F{YRxiH}TJF6Pe!}Jcf=3#esRCS!(-i4)st0XpW zfMLw+g$c>>u8pv$`Qt5csNDZ5ZX-2I3g(ojoB51ZAo@)jtKD8m{K!(riX;N0dZTbr z?`Z!C+)98;1Hs0G)~JT^i}CCb$i160)tfZ6bQ`A5M8;NCF@upWCM=lLGj$x68;t2R zYyqb_ecR7MZT4wLog*yyQP@-Ynf+{vrAum0pS*m>*R{MC6HC zzC|kT)=Y`~L+~PgDS2#TX*2Pkm@i{vJ2z$5EI8e1-amGmNZo3H3T(O;20L zumOQ(I6P7Z9c%DaeYCZe<)gHqdy`mYS!#>$a7;8?DhjePdOhg#a3EPYcC`^B#jbs$ zJsNB|LYKMP$vzteqodjf_>jr0ur&gf|AmntHO&nsd$3rf1#Ei?)vZY^)OVsyP1Z}C z2cePEaI$+Zc}8pMGxS%!+JO6Ze0J#;q;$wLi6LtG4)p$(Oi+kj>q>saqupxypQ&Y` z@7Ez2jNmnFly~(8?3RxeS8xyCwjMVQ{A2J+FYfMTjTRYMN+~cFrwN8Tv1Js8)|#c- zORUIrBl_g*4z6kn(LhtjcsLLYF67UN1DZm7+JB{ zZ?p!7D*X$K>nhlC3PZ=dYxTBqh;;_FSN%2n`U>q_&++*~y*PTX0x}_#p(vR)Q#)E# zl^pHjjosC-4eiKt?~m`RO2Oe^D<3#An|laGs`fh-zGx2wA4h+u-}@ynp-~Gp43-h@ zSORmYxmQ7cPtw=GjV-!lk)nwdrj`zZtD1qnNyWm)DO(eUWth7x zVnfymGLxKT3wv?iU>%82Uf($#l1Ga9mpym~IH*b7pF9XwH2C=uMR-S0pV{8ZkzVXb zVI*8w6Z~<4Ko|UPH@4wcIm2J5>~59Q!*~xY)!CA8x!j6nf|g`P;ymnk*`UbAOc<+U z>4p8jQtrKmTR&;hv*E5+RPK;t59Vyw;?0YLUJqh?9eM0S_5Jzcd|7al9ZM>bi2B%p zebs5Ganh;}K~RiK@pgJ0VR-fRUnt45AO=3mo&2}K-QE259o~sKXOypoI7F4nPgG-H$mP=&bQ}`XBUTbN?|h{(#X8 z5K{*o+kky%35|x4NUFgdpYL5vp(3Qn*)4EQnj*&TUUjm6$Cgy{SYn~#LIQm$k}#;t z>A?MAN%h@8WSSaJZhY=Es*l0g8OvW#Gj&@&qACPpWMYbVQ0ETbC|tr4$tn}p&K@!v zgVAugEp5T}3lX%T z=x}fLO{=|W;8ge26 z5XWCesno86h=cmXI1#wLp}EfruYUykLEKDRP0jPD zNe-HWRtFf_g1;Y`Q`|-FaGkiGi@R+x(v!xy(S*}IaJr@lD86&pnC1_Qdi0QSWJKo9 z66!jkN12{Ez|5!1lg4JOfETsX@k+uXruV9o_&M^Cw(SGczJ?*Gord-K~TZnj6#Q*IUm`sBZNY*jmG{H&GDm7!Cv$#ije3<=-am|s*9h) z7xeYJT)UKHmdCkSeop;Ci4&b={sErW(mj{Nl-ilwW~qY8zu?dV4B(+0#$M@;N%4hw ze?@~IBU4eF+UtK!7cNms6@}AQetX;A=+}a^fDpF13!SCmrZ(9VCA87tc!hD<1L73y!)4c&NY7iS&C^NRAK$t8jS6~E!3`OlOA@6k?JFd$YKF+T}LcOt2BflWz z=%={(b|Ly;Dp($BYF?97n>6jejo&Ck%(dcSI*)|H?>nOAJLDy^BQJoky&s$XOR4Kny=Htq;+6 zQWMops<}DPwf&pISm_E@+PM%E4EM_{0iCT~MZe7=q`u)PSC5fDO!gHH2#L{k=L7MQ z?p%Fd8i`))d_FORnn>0LdOb)6q_$l_zI7o6@3{ikmwO0lG!CI}@byJhcxeVb#dE?l zYG#a%8X8XcOmUROH7cQK5gKU#=xK z#o!eB8>4a6<$y)9kX_S3MNfl>l4viK9O6i78lq;gK==wKXPkwPB|JOJ3(lKdCQdqc z2~B^CCizYvB8!91Cp6@Yo%EyQ$z#ns8v2aOekg zCUhS0R+#j`dfIcxM(=N3q>M!JdRP0kgln~rXz8y(ow}P4l+&aR#ST83o)1CXAfj{k z#c_HKU0JaE(-F zNPFT;lAGwr0pKi@sY+-}!hK$k)l98l4&eB)e%yRJf7edo-WsiIWqNwJYk1-a`8R2y0X5Ibl+YCS3E zm4}N6Rv~HG`9qrNdp$jBa>FfJmM{?N!La3{)Qo|J!pm+!jm%zUTo(S>BrXG0VTASG zF>LT(Y1Jw2#|uO2lXCD~2?OE(M6-uzavl%P-N>>EB&P%?0R zOFE?m38v^*3!R-MpeR9^V8VdNr-{V2fs!bD=WH9kP>K(|K&2_}&YuUiu1{NF~J7O1pObdAwiM(h}mD*Q_S>EP~VL(xW4Pmjo^khpPa=UxzVfo6Z9gEj9$@;rH|&+_h0LC5}wApT*H4fDJB(>lzu~LrGRv zzH6b%kf7eSE~W0ALaHJ>#^hxYkU``ck`-_Fnw?|Fb-gA)fT90^91?Xy(Ad@pMou$D z!^8ZdAxK>MCzzutyhK$z&o?J$rl%+H4tiqx4c#^0|8AaZ+vpTm4Xur@rlda%;E5 zG>LVYuxsxPXTrLJDE<>x*ql@t4@_adgSxV4h{1Q>oBm5>buz~5Kf=U)=pd_z)PXEkj^Q$4KF-Vc;{LT_eUCy?CZx=?uY`Q4 zOA&ap_-sG9X?e?tAw_t}(?vc&Z3t<&vPtGs9yy^lBhIu#g@&{jPX9!IZ5w6Epx!o$ zEWRcD%i9o3&sD;o7~=4QbL(FKScTEL>|D^#3>O3In9gjP=86RAc-q~gI#o4wXq5R$ zH`A~U)74YsFm{5!NN?RXg*99qEzyG_Qj3F1jqa zv9iZ8(thgOFD^&WZF!=pcB4BlH)dy3C*&ElY$3`_JVF$TQu8w)27M`cLFk*>jMEtC zp@lXR*ft#7PNA+BMKYex_?cH$J{ed{h!B2k0Lfai12X(RLm)t3s?WAqK%(Raz?5`WZUj2ko2N zBbylo_n`}A4gCp%^aa$gA*jnI`Im)bl=MmbL^fyRzI|g+-Q>>VY(WERyT$8{w82M7 ztwwdfXn_cHQ*tAtKKK>LC(JL;Go^yXc^MVeK71#`)`ua9$T?6B?3OjwIuchzi3pD64&EyR){!tsCGpt{c375Swy z_LTKJTv(&0E~Q+nFkm`c8~h^}TwyHvrE37~2c{i$(=5Jes~nqw)wph|Qrj6}ahgFg zBOVlfp?HFcfUFiqO87do)l+-aM@qIfETltI+~Aro@k%Q>~iw}QuD3QsMAQ(v);c*&b%hhKNuwM>wK;` zJ90)i=mdx88Y&VUX||kRKyKqzj-Zf`@)$@O)vU%hw6oT_?M9#9LFiB&qf$HPrvdRA z%24Wu2nZ4gA_$5SK`1rw1lC?$!wCcg9{uwR3X-0Q13ZLwmXHyJ-iN~i1E)i8iKVa=6WW=TB6CNVI$|1dHAQb#9_1Vzdwo)3`y|+|IhzVZD5N3g#q+)Mb=zY z0Xh(55DtI5fo>O27_g(bt~U^&4&(o?N9v=X{gEAZ$dmu35c#L1#Qi^486V7p!4P`| zP%NmSTvrf$x-qUUz(oG@r)e^pAl6F%YJUHQBaDRxJ_`#ASMEGF-H>3Q!9pYuV4%Rq z+10i9r5ze|ssZ)}4gLSYpl~RrWlLJoR9Z(z=WN9+C&onB%gd{xtc+SESqA2M_S^p( ziNXxI1jDAHqEZVEjr7N~uS}`L5D^ivOaz~S;rr+DByp1kQH8{xD#Hf8OlK6V4w(&)7~A~vvTvN+(Z3bE0G6u z{O8W8bC;(3vs7|nbb1!I+n}Q$u&MFCj%BYZbPW6?WMM70Ew+O_b#ni+191+%Z1#at zdc(IhkH?FRTJ3nD8dws)6~o}T*`b8P#sn0_!SD`S0uRawC(?Kr_Z-pIo^u5-)EBog?Fii?UwpKZRFvrEII>8ykN zM%%g)gZ>ZVBan`k_kg1uvqyE*)P6Ya%ob^$ek2<3TbazE2$`ZHv(nz5ou3!AGLn$N znl7RUi618z(`Q-*iTEG%k^iLa0#0l(n?oUNVlJ%^hr*oUzAvi$8m*iTk1j;@IUF}` z3aymUpEx78Fi!N>;NaT{>9^WqINFe}!5GJ@xG)g$$%?wZLNEh(LNL??h?G_rBee1* zCVrByrtvYM)W8&`KIes>%M;0009&@@bZXr}xVlNfxG4OolHq3~LXM+S9|1#B&$=|D zor)5#_TRF{N+l-t6NqLKq;AA2V(S;X{OuPEHg*sCp=fgFmN?V_vbh zG@-9RvkJ5Zi=RWvrWNpX7X>EtH;ex+7Uhy~&=-sX@@d+_eIpGzj?7+tlpT9c3*=D> z;#M*YIC7sc8es2Djh_SKl>hIP2YCk{#0>l+Y`yw%f=DD}W>%DAVX!36frf}A!S?+l zh?FCV+hjIb9Li~I!6?j1=Je7q)G+^T-lCwal~JixDqDY7kdr{wtZ^Um;k(cSF#pw@ z7?Xoh=o|k;gA6MNwrUmvhB0~~tFQP9+m1cX#Kc5;m%Efy6jyn1hzyX}AbD=SN`9Ip zJ5OC0hNLK>m|UL6v*8Q|G!8QzIEkey(%;+Xt9d0kYQKgr7|^gJ^&PI34hcd3XQzLE z&=fYD*sL}E>5uwYB9OgYM4XFlu+ZoV4J#va^eA)uw-76B4h2p*TnZQro`w zxwW~u{G{1L$J>9$Es`Uy1@(SdF>~tLS#Pqze-|D_Z)R>0M1ssP%kfdAn-9)yftLjk z#h5eVByDGBH@PI6^q3qbBk;+!iZAAt^yBS8CNgxpeG%)>rra4)L+>NQB0+$e*-~A4p;*=kNC0Jpt-KIJD^vpfI%N%L~JdK=PR%-4EUX zrsNXQ1p~}Kl8Xzn4J4osLmtmWvNo_?4HY_&CNkgU#l_I3CU#{NmAu+o)W*if(yA)i zlZ*fX5FjwVeE9;v>?C0TjfedG9EE|6r0~z8>}TQtHk1eoI<1|$j~!b_GlDvp2 zErcL@U)T*y3baOR#O#EUnBb+@A(|9>1~K~l~Fr18&u>?at>W()^5o1z65 z%A-i+0$!@XQ;mLoQ;allS+gF2lK;G#7M4LkOGF>o!-W+!;LMfvlTJC)M!n+v$7lSs z>|~<;ZV$q+68|Oy@-~YNoa6p8akbcDNyN!5`#uMMMs(s)s1kgT$UbEa0U)}09bM1nj- zZeHsZKnzJx5##B=EQ*P>6z24q&BT7+R@c^y?(gsG4ODX$)EIFJXRd&07;(m>UeC9> zbIyMNhO_6$1cGk#IkX-~`>mW!S2qBSQ#12KxKse^9-f-o*;RAZ80Y2$?(FO=XY+co zBHy6v_4oH9JeX0%Il~PbiDQhFs&wXr%Dn+#i@k|5)r-YU7Z)EWkE;uf>lMi|HWcFHQ}^2=b@l)Ui!4` z114O^ky?Vm>)+D`dH~?S%&3AuP6F~E_l*TT0qvmxKL*@DlkJA4y^xHL5AQ?>i+T5_ z3#xS5;i}%{4Jkcp5h8(1{i7m+|9=Z8+FO~mrov+f!JZp;{CmGVEERaEE1}GG2b1G} zckb=HRAk16`j0F|Y&dWA<8YuRLSO-2IUhGYrvMug@RuJ1jeI2vtlD9D*cCAAS+@v$b%;~ zx7t&bxM0@Fga7Yv0adlr)7tK1$#ksP6aYrY-Q)C}4BV(8pOqYrrt<<>@*sf|+IH(R zsow(35O1G6+qO(p8IQwFwERYmXpJf=68(sfsq|IG8(l%7ZBHKgBw=i_P5 z+?qD~)uG6g9${J^@Pwr0Cr|DWXM8p`OOqq8PzlMS`B+ww8^Ny3qXPi?UR|7>w@>x{ zc?xDPvSEJykg@)gH7)xk{^vtdb9$b!0N>iwX<4!FsqiIM-9-PMhSZyFb!K4MK>jm3 z#?dPLXT<|UlC~QVdeEonuRRZ-l5gvJZzyUo7|X=+bB8W&ZbP3Sm_JvGJm{kh`WhU! zE7tYp$gx#kM7r)2m6iJ~*Gd z{V!hfj`$DzLg1yU&ecDixNdI>*!_PX$TaL4VN&_@qLMR3LxRF@U^k=LU5}lYNx||; zdU|?}2YnzcpCyekd!`2?Hq>YJK@x0b#%^V%5YAR)Da4Hnm?E?49pLY)Wuw?6>DQ#+ z%VM>m;}3l$6}?&3_wc1)yii`j<9+`H)bBZ3q?`ZJjSyh22?2hzi0w49?B0S{K260h z`HABpQuhs2WmG8%9WWyh$o{4vd=|a11_#CRg2#4^U{~M3<%Ndlt(aL^u{WD+G91^N z{x@5n5A8%q3CvvybKXXWhm%6abp9XUfn~Qm6~ugBPZ+h{k{h-pLO^c7v(y=uB~no zUn=^o8GmHg=9~O|?4gmgvL&$u*itt}gyvw}9QiSsDhuKQLnNp@tUTF;rk z)L$5cq*@P?p%JWG9Yl3Rd~}Z!8Ev5Nj=%*{U^J6=u}Lz9;WrDJEBLY+<319CJV3o3 ze6tMA`Qcv-=1#O0<e5KbWAf$A}B8ABvr{u zU&2DDG*gBX20ERY>9f1)q51qz+Yf7EankUrTte+(eswi+h{&(QnA@%PHLK97TkmZb zo7@157q`ee9(w@q9IM3}b@=94_YaEzg^s_DEBYB0Do14}=Hp^Fa#V_T7`JVCFAM)G z!^WkbTMsLNFFuq4gCDLaF9Ou@zkI^{+%VvHy3kVjiC4~U{4Y>qC>KP%#}}ZP$H#K_ zvo5o0G`smSR;ZA=&%QafiI-Uu$zblt2K&Xv;z~h(^clnLIeD})WSbjywqJN4GbI^-~cvZ34MkTT7{J1&=CVxSl_s5P5P zKG|er`f>PMPMS>)8snWbF=(oIWmRL?P^L8vh-D_=+}xF?+*{8`*#LRO+jc9m{m*yX zpdhxossUQbDirT~5mhM3^fmp)D7j1ETu1&EvdF)FIqg^r2HIdjbh)l6)MN~f=ipqkBy0G3**Vl_dfi?;nk*jH^+@XW;jWS5f1`KlvhLn zSz)@DID$1$7dlH8`Y(q2J6QW0=P!Mawss=QsZfU(A%6PM&?LJNNd2h01&Yo~p{;## zw$Y%=%Fq!i?ccJ-om%(;ju!$V0tzg%78y7jwQ# zB-thHWg43geCD55mX_PDLo8<-wvTK%Y&Zy`gF|`pFj-ky$6!H!-cXalK=smo##Ri) z4Pr=Lb&<E%~jrqOvk+sN`+1ur8K$Pa%u99UFb6p3HA=BZ?N% z*(C=kAiIibIe=EGk^~x=P?|%ILqf0h2%i;O%md(8F{X6*lr;+-C56$0J`5{XEGygA zysr|e;xL$gPkW*jx|RV8*5cHr#-k%F5^kN`EX!x?q`qbs=`pLL9HG*lLsmq^#T~kgz1Lqc#I7iI z*GO9->oG?mEp#gw#zpu5=3zUI4?7oND@5{7LHzBvmP=7`NZKHO0kH|C4V@D=TGi zAxsJWN(BVm(1=bSDMCQ#DEeIo6l!j0~k^-FPS< z76gd>fBr-Q6d#vlLX1Adq5LLgqpv|#lI46PZo8jVynJ>EC8|TB59u!*ULB^T0*o}% zmk*ZJH}Yy;NTzBT%%iY=1Ml06E!t;m1oD9cxA>|v3qVzQnoa3aWe)%?FNmU$7+TDf@suZNjqq-b@F) z!K)Rmw$&CW@VbUWK&S+z&5fB6Beby%jqa0Yr%zVUvkg%aK_zY-+Fce@Ar zyVvffI&U%%kE7SiGc%w$Q_$U8)z(fwMQudWB@WF%%+1Y*HFDyxPcTfxM?JVix0WL! z@SW*caO?k?(`#Q)G?Y=XcPBWT$jR|sLuC8D9M`3fOD<@q8pgPSl7jSDu=H7{cijEW z;+PN~8j)(L{SC&Ig%rpaYJHj~>xN8LiCSieW3iW@9H`F>)AByp3Vc1N$DF>B?zuNw zp=2ZT_&>vA_1HG|8P@g~vkGBA9NoQ;?dZQ}HShenM(6Z91MyclR+5%0Rj7B>ARiY) zL>DIWX4m0!UUBjpwTsVCadyl@iUeL-Bwp)2n`$BX}|?mW5nNEVTE!KDwSB` z7>5$vxcD2|5uwGk#b-h>m(&Hbd4^=*&<$>7TPpq<)(=kV4tO?{)Tl-Sff4Vn21hhC z2F&+<_WQF37Pl_`$&`j3t+CUNL=Z^I7-aPleDzZK%Bw0IXG}~?9pgUWq1)#gpuAEJ zFwAFV^}i9-?f(7yH|w>lJfx{;l12gwAw2ETt)x-yw*__S=rC^_IY98pZ*0V@tgM8A z0K0njHn+0cQ=b3r8&`-GPnZ?T z*h{EmG_{#@V6a`dQ^2mp1hi|}9i4r>jj>4H2_OnP9jvQ>!7wJj4ycDrtXppk0odd5kZ z{p$WYoBQS#_HtdKIZg*@O27W~T`OK5&@+NI+TBP2C9APf#VL;h6L1uuCUCl(U^H2; z*!@yxMWQE>x{68&utY&~bF=EZcVOOii2FkQLVQpT7{&UCvV}FCkj{P@GLsqN$Yxqa6LiEp=e&gwWQ>Gqk z$%bDrGNC=9QJf z0-{CW)+NI-srH5~ho+_`3If#3{CuJ%E!3zp?PQ{<1#-Nz0{X2JBTn$r>B66G6ZV(E zCY!bSf9r7rJUXrieM*ooVaP;-pUsBq2dgFA#-;J^htrdjzW2v1z1gAPB?XBSA=HcJ z9V-)mo%a(j$FUN%XWT-g6xB-Wqr>KqeSqo?B*_$NV zhfO zLQ4Uvs-z9Vh(7?EX9}l4iwPe;(A&n0=R|1yOXYU4m9OQ?V2eh5K0`({n5`Bs@@1Qa zuJ-uM=`Oj2;@a{!)ba$B`ovdYmP2}^?e&3(=(1t`$yn~id2P-ZJsB2%*S{2;4khd; zFy@Yrj!l69i;Joa&CLZ>^cKU)5>64=e<5R=C$3)1nUGjo_~>RirB6Dvnydu@vriq) z{aLeY8q5bSXS}?)EiEnQ16;rV95_~_)2Ysq4?kjKW8<{n7EqX&vYa`XFHDNQ4D&rSpa z%b|<`z^S_G3L9^y#E=%HNF)fxan)Fg%Fd9@KmUL0uLxbw^ZYv(gF}pNf4v;uEAQ1^ z*PtE7+VK_Iclg%3HiLejnAo>xr7CFzcsL#E%bGy0+fov^a!Z?%B0w?e#J4>*k;T@oS@JXs)gNS!S)p|rq>cppC!r>UnBafG!*EROBe4oH`W{NPzLkFL4$iP>ynXG>!vz&6XREXPLVlC)Z*4)5O%_iR{j!+z&7QI^V%9Lsmcil}Bt z3k!qGW^sLr@`017;v87qTwL>M7!kG0<8%j!tdj5fvaPyjo1ORoF^N*n0?J)g@}Y%_ z#k{SwFBg>*H!6eI%S}qRc3cUhh0dr_v@hi^afn#{4|hRRaof!fjoT$!6DNa6w$>lI zuvzRzs=f78M~s9?e_{~ot$-^G5-M3DFpw#)%RYYZ!|K|D;-u+gXM3)!mVd(d*C|^6 zFp`pNqeaB@8#T9`R~2?#N4`#Uv?isp48_1Tqa(E=n%2XL9Yd5p&CDpl!@y(F4LrFc z)S7F%rbpsVn6R4*Cu;AK&j zG08p6;{->{QoghPbW%$S^%ylCH%U}M2P*UoMb#L=>9HXf%9n)ng{5|Vrl{~}F$3m7 znVbnRP^ch6O4+a(4o0r)<}gQBFlAl#%!F_@SSEGZa-fUp3jl|XDP;NGU2K zMzdRFe*{_nd0@HY*_-CMDC zKBrpgU8YPgSpZoD(rT?WT48s6+*ez@q!-!JOV^6^$7mO8u)#w3XjeoGGLMxcpxz>9lD&Y=iS~@53fs$`!Vykb^7rvt%&DaG$9#CZF%KtY|ZyBgLL8rO?1u;bEM9H%XH3%OM zPjc7xoDI9J2@B{@bRuVzp#;BP3Sd{M&*i}kyU{GYh^T6n#q|HtUYeOv_-Vw1kM_0A zy6dlF>(yEPRNv)eN^m&e3Qv~77FGM~q_QvNkDmn8r4QxR)kA=SG&4P&yJ@wbX?Vx| z-RVHOx3fez3<-Fd`|T^eURTBOX>!|Vl0xKgOv{8Q`eTj3NS}gKgw3g}!8U?ELpN5l z@=Bd5Ly;N{1(Oex#h1;Sn;C1XTaTBs^+pZRKG}2b+IQ~S`!~{TPMr=57!{tMnj}xk zfaOV=KjW@C;arps@O_PjNO&M26t%o8nP8zOjm%?ZpLU7{o&MjoRC%P`updJSBK{XN zK!@*);v5;DiA(5qQJ11(8Uu}My1?_#k^`VMF$LrVLF6Dow86V$_z7DL-H7H%9T_wq zG@&x$uV}b^hmv%0vJPtexd}V#Fxom@w|ti;1%GO2&p0Y@i<0CXF&; z`FG?4>c$fD!Td2DNWd)sK=1|nh(&DaB}qO(MOh%ae((2z7R_I{UG=xReAGKV*+90R zuJ_Wre1U!)JAS#zmjPb$u3LA|y$~NPt#2K-dK>aXkI+ z?bR|iVbFqQz<^% zDTt*!p>jU+;$mo_7gfCog!^JAKOwd@JNn0k259?0?wdlFTzEY8X*P`np>U~&LIlTT zXi+&v5K@nH34Y3iyzJft66XS+Gh_T<8W(gl)p|(@?o+K?`)KW574Q; zyD`F#pL+3r^x@_q?6{zSOzuh@lKxps78Mm~g9mJczX~PZy#NBW1h|n}_MddR6Mr5Y zgk)oO87w0Q%F#7v&K)f>y~N^T3TJ2ML}}`EgjX?A^;KBn3pYh7a{1TC%b~?eE#QRv zITk*@6@XyJh+J+mn@rP(f6Iq- ze>mq-UUag=M7i%e<6zSx_t`T2@zF*=HnK0M61DyQ35s}DopuJ8cCj0++0mVxrYe7s4Y zfePMl#Ob(;_Ej-7k=W4K7>Bz7As=+VUmHgqmSJTj>`MIl0(_%L1H$ zNV&1?BEJ?$Q~7M${!h2{6${e(;Auq0w160v-7`F&5D+{(bzIY=0qwz$(^cU^mo;Cs zU?R0xaMw7%uoaeVRG{bbk%q&a9%C}XslLm1Mw|1UbKGSc8vzOxDXjYWIhjstl$}@& z>hJH`Bbw_*`bznFuSZ)}jubZ)_1z;km96FcB6X$Gl|V|l?D5mn%hg8s*N1Z@d?9d< zP4_)SnJp;*d;40}Yi%wx4`(Y0jtCa#9Lh-lNpGveGL+>9C)?(OQz~ZW746SZ)~jzC zwCXh|NMT- z$`)-SF{-!)y>^q3OyJ3h<-hd_@%1PrT+_#3w{5JFTU8|K;o2?u^_;X`0qia z_A4>hJ*yb}qpQ{!#@%=%QSnI-9J)Eq^_57^EbE9Agr%}qU?8-aM!iMTR}};VH<}2D=3%TstKQW5fg^tkJJSq1qLrZ)IggEsA*sQ`0+-b z6*fHJ3l{>u1_eY+d;q{okGeNwzJPRby zw4Zoz>D_7)6XozHeLs(4=qi1FmCu-#g?g8rO+c=d@z7i{7Qz>(rBe&f9ChmcHMXeD z$;|wdt+@j|$NkB`*|}kFSjw+wK?*xJN{o?@9jSc+_GoviepQFl=Q|uoi^s$_?~nS} zKGXWXotpl?&OOHs{icn5=Bz>}!4>bO(iGrdNl6`y3_uD5TCW4c}?JJ-u{u5-@NyzT>_JckxX6A}DRTGCgn+q1K=G2zHh&(4AY!rwU0O_F$=IB@k@77AQ0j*b`5X*Jm+$TDGJ{&g<8 z(Ax{^Q;QUmIkwn6cdN5nqPJdcuoxxg2Yr`~r-rDtIbE!D-;JPT{>G6HT;dfk zr#7`-rAhU>Kh+`&U_h=rO^NX=KY<`Xfxo=IUXR!Ri;+M2CQSjfOQ>Q6sQmxVt<*c* zNxJTqOPJg)2$s)Pmu(g!ZYZ7JTIarx9B-Un_FN)$hP&CdfGYriCd%Ppe#Z|80{$}s zq6&&AXduetXlwIk(CbAfzS&e~4AEN+qM4klzPB|_pe`l!ER+Irfgo}L&XhkRm7M>q z%w=NXGbR?T-kD+~n~Mh;0|rHZKXr~i`%!`{SVomHARq?=eckpOnKs;Y{2 zCdFS@qAjl%?MY6N&;tf>88e^sT{e`Le9#Aod9*(1U^!D^E~kssuQk}y%Hm6rs#GFg zaBRni$3W-I^Ryk5K4B4}xms3KA`aZn0>Z_t>D>NaC;6rxkU`CRh@PcRwokIn-QOSxL-M}j>z61wcSHQb>Fk$_G zdY!ADUZr4Z1*kmAl15CJG4eQnnb(;5qZXH`rsn=?ua+g1i1!7vE9HreJtpVkyvsJA zH6)tIg*MMilY=UOd z;Bau)5Zv7@NN@-q+}+)SySrPE;1Vpjy9Jlv8sy+E_u>7%dv6s#Ql#qS?9T2?Pj}BI zBw$qd;T4-cRYUPX3u1FN&Ib#P(--pWSiRdO(tA!b`Xb3I`U7NPSs0mIjgEvo15O{~ z7=4PcRp9M1RZl zp2wvm(nB&;>?F%hpU;!3ai;pDZO+S4Kbw90u$N zj{xIBGiUO}+PcCCT0f+vOrK8ma{_I@UxM!j#Aw?u6(q$3LGN2ROEIJ$x7Y<6(q6wn zzuZoi0)QH2c=K;wtC4h|X|0E?jL6=tffe`ch*)gCoM2qPmMj*37U-ujJLuS1^BSFEWGSR#46&!8rAz4?!L zBQ7x^H2n{3m+r3g4PT#IYcz#G00<)@i!@k>LOA$`80&Ox{qDJD_o>7x=Q;_;- zJDs`nmX25C+~VSn_Qs?7%5lZ)k;}KPW>u`B4e4sws+Xrb@Ggvhs!x2(-tuu@%CfL;=7HWTXYD6Ree4DA&^=57P2s0Tv%F((+IX3n$W z6{73j6rzTNKLB`nMs#Rqk%faJ`)}yTUjGDl%^<&hO;6aFx4&fGfJ!rDseNDUrPBD@X;#deAX!o;pg!`7F1sGA^xp^WTt?e5x;4iUd`lQP75%UOudC8x; zHa?}*K@kk{lCo*3xq#$3aa*dyNuMwa{{VeWI~qDC@HicGkM_nq?eHd8&%>TmU+V8| zM(TGV4Eb#IHh#xZ;6RE>?1vtP+7+JT|E9Qf-+8xL5VsNcbmqH}47N_XQ@P*pUk$Oi z=ecHgT*wI?wrJ0;FdHB$)|~zBL7SX(U1@t@d>OOiwFuQ0UQMB@TI%`>8Qp zJ-G}fh>v6;nHz~^DU<=%-)vWGdZo@74v1Z;KoxybGpxs5V~>@AX)mQ>l@txwA@z& zzx$%Re6ZD9@|1472}vav`pyh1gFdG)?eWgUqBjc{!RHotkndKHwcnjamnUl1s>L2|DLZ3uW1JT8H*>yBU-R3Zpu4TActzR#M8Y>2GO6!1|dz@w*( zoR6>@R<_@8W%K)!Bp_+93^5rQ7Gs;KIO+cbl9vZfXhIo6lzj^VxYHK9QXPrNjS%3k zbeQ0XPzl%DwsJ=GE9e4PsqDYK6+T=6BtPX+%S;d{u{Vj>FErK;CTo=Q=@}#^^%$Ce zk{>_UO|gE-ULX7mz5^%@Xn-FDjQ<9=GGg*50wibxB#J$-41n=?>6fJnYZT|@V{u5U z0}a+!`#+XUmGe+@GYK3xv#_#}a@d}zGSNL^?WlS+8ng=>BhU6%0^zP9%X{eKsHz zm(%!>!M_;uX{@T2u@mBeh$ut9_=(uz6D6Yn`KC;Lyff~14>y_9X*I+7y=G(ZiC^-)5IF%v z=+Kp~-H$I?li@@2-yc}k0-hk?5wFrVxLVN*G_xTcpUWLI!}T?p799nE`#rz&f4k%H z+s9(D>(Ay*nLc(c)8Rwo9@mFZ#xmr}0;pkw=H*Q%zD;!I*@gliMuHMYjBM>dNlQ_^_4$F z02URYJT#yn-dys3Zd6r>o6Hfa6%N1{XL8u$VgD0wjR{nKmfgCz*dr-9Kp- z2XW#=ki7fp#m^M~F#6yQFh~OsTw3T6%CWW;KyR)rm9 zkJ2S%e>AV#JukA#UAWA|QNN=*yAj(OP&bHFdM&SeYBqI|M$KmDcYm$uc>nNm>7}nr zHUqz|>6ZBLd7|zfL8xw-6_>81@U`fm{`&Qd?ZQWV`*K&~aGheT+2SA+EeQ>D89-~nfWv;mMF?Em6E{K|u6Msj zwB$!|<$hhT7hLY!%h;$80nML_`9zQNC3E4FuP0e9K03`c(|jyLsjW2)&(@s(BB<+qJg0NYHS1) zM_O=U%<8E+A&NKTgd#FML7k);6KIojRG#H8Fol#il{WHss@CY)m1g0mm0a?)Uedzi z{Co&-2#f)yo^BrIEUomhu8+6&3X9Hog7NxdrEoKM!RCUa0Zf4)|LdvSKt+((8u zeS8x>K-Hhy*;#S#c6@iySL&a6?VXb1R_S%Ox%a7J@O5PW>`4!Ze9v+V$aVt{O*-qN z{_KWixkmM(|I_dk3aQ|mIt;}53aS3k;1kK9|MM!E!&T-k$Ea&#^XA^hZ}g;5_3b?4 zNAM@0MJO1?+y6HKy5>7{?XH)Q4)DYN#R-#w>riZYx=Kz=;^iPH&dRiKcs=5~`HE&M z{?~gwL|6D87iefiO+yHAqVUxLMllPUt3rY=V2mM)@A&4?+Gv!5SX)o-BcP_vnDl#a zgjz6!YKD4pZUrR#s3e%dv&JVKTmAc12#>)Ivg9j%C5?ybwHeIT#;77Fe$$}kzVYiK z1-u#OULBq&AZ(+U(&54MXTPxrk5$?-$%K1JQx?>|NT9EdV9ywrXaw}l$dxAh4W_8i z4>&mOdNp4wX`AZOHLpz2eu~lfBZi1s*xBWl>${Ts&Dv32c^k#19oz-Mp&0>#U%(k4 z1o}P1-!q8wI`vq3xP60a0u`M2!bw_eGZBYax6l**L9))K%V*8+~tgYgT%l6lyHX^oqna0m1ONMZYFNt zo}{f{-|lZ#y^>=i>H5$sl&`Nw65Mv81k2LCfdEdDkd%#NjX&VhXtk5oDNBF>HAr_q z?|$X1a$mnZEdwO%cTWAEHu(FDMQ{&jD)!IuwNl!5;A`2utfV;}{lQ+cQEw$xYteO_ z`dEQYRb*I5qySgh;R%pU2R7uuV{a4Xkl+6irW*SQ5a3#~ov6I`h+3QzHc*oI1deNYVs&wfLdK3uph%>*VVG$%9MsVWAV&BE?&<2_b*z90-W;n0BpRtCw{zrog}IP zQ%ESQQF>gK{m%eEXBja`2A05`{yNL->A1V6VbHAo$v^X+*2u9y>I+@bnEDcpNPD`d zx_qDogP)H#l`q|Y#9-#R89~gZH8KV@+HY%X>l#+_6s{F_SwgV-xZi%Jex^HVu-LPW zwe^^GzNW5j)%vtoq)GIMFN5NKZ-CKn$q<_K4^WVvMCCFnzbQqFzh%n4G%P14I;h07 zkY#b80-tzApjpvBYlI&qZJj%|CkFgxef z?J8645{>l1~rUH>T84tWjF;&lJuj?iD2Uf^cg?QIEo>N|gI;{m|t9c6QC=l6R$MQfk-p`i&>UZ_0e}B+q0DpH8 zoNo{$-nD@iH&mqXa+b)oL=qF|(1pd~LI*vtzyB=fSPoIAdA9HU>%_P`za(kV!i63^ckn5R}5UJl(=wQb0 z&oqf0X$(*;$WiB4*VIU{;Pns25SAien*YNV>q@$old=I+RVlzHLgKlbkd0sqF~Akc zDZ#K%HmXqjI=SjF8$<8-;KU3-^%FPxN-laiDir}tBLWi6pWy>Wc*Mj5OZPl_zNauk zkLS3F!I|2Ezh8z>2XOYw*Jue9NFsdgw%GpKV{W{iO9}U6G(;lMQXz27j#mPz_s-pr z7t4@k;7op4PkvaLhRMQV^+@HkZ9b_d_%iC)Xzt$OYQmj z4VWqH0pD3~XbyOCcey3rFd>oEP{JHCnqNi zSX;k-e|gNnjjYV{(QOL`)gWHq?f=yOR6SNAcBp|L$r<3&9=3eWzuai8p)i_j?hA$W zf4u;Q?rehf*sH(*N#r}z)2bjZWg$E7hqX$xfK{d00PJhc!_YhFLa!l)M?w%Pie(6r z2v0;mw}IzGs57&%Au^xrj0qFEqHdu>;KYeS2e(MQ?Y^I0vEK%`>yezAHjkoYXZlyC zIU9-{lgge{g!=4^X zd|jSGZ`|`cb~vMBV#o<%O2&z_e(&<_HM>f1l&mRbm$3h>B!J*?dv?9Pm077lpm2xyQ4L@|eg^4ODqUZouG0<|5EN ztLMzSDOruBLV<`(dP=IQ7@3O&%H7Io6YG`=v%4-6)Yv0HT?meOg+3(>_n;}T!prFW_47LsQAchMvp_eR zpDb`40q1ak$E=uqN7lB}I_r4k#Qga9RJX_6G$Muqv38wXN3H*AdVK-aZbse%dhyxL zdV6YkOKhI$J*rq;zSzLoso+TQJuwgW@4S{P!TRdL!#R3qC577B;v7kNAAkSjgX2#E1g4b zayGWy>vo_{6Y;IYunS@uQ?sSt6{R6DeUWnu>lL29vj9Narqp#mKA$KcH=AUof{YCzIpY=ZrFFp#)$p9J#x`H*9KgPN`=k_xbN00rGAp5v(frMmybU5+e>Klm5}b zlE=ULl%w+~eUk1OjrDX8`To}01GiC(gwxAz;SK=I2BW0-YOCEC*IlI;Nj&d&mVVz@ z-ygr-bk&;mLF2LMggN8FIlTTMdkh-{#Zcr*8)F!<0#;seO?k&2D)KpaFXHROpANr$ zbxFADFA!SW%q69oqY6R72y%`#-u0T!x~#BqBU+qUq@jI=c?+bEBV7-5Mp~Ree+@L+ zFv`i%S+19L`}(ijW&+no@`wLO68)!4dZv`{mMLJ-DL({>a&S?}Q|rPq%nf^c?1f5> zsd}o>Di>@_EUJQjv^r?P%u36nhB*AYdiZ7AvOQ9nu-Fa^>Q6jr85UtPDq}TPoK8Zy8RIfh~*h_fO}a`e!!+tzr6EikkH=3i*Xcq zsR5ukxSW2Dhs^tIlA!7FCDLd|P^c}-H2J*c-5T()Awsa=x$bO5X=;lvGQ&vNK0P#h z8*mIZkim42@Ermy(fLw|)I}Bb8&2+%P-J8)k|EX&3*k0Y%mhQPS;X6h5rzTVq2U({ ztFLk9DI>KDZ4REI!tSrV?}?V*UExpN=|7=Hm!m=>M3SU>E1?5l53i`T$ae8&QTyj3hq`;vCSqeIU%KHa`(MDF~va$thg$5AUp z^rR1*9JrGv0mvb9JuY=X)dgsFfVM?E8{UKJp=bV1U_Srr*RQe(M{aD<4Mfodl-Ux%)bt z!6^-#E#WH!o}6X#`F9F9*WVXMHkBcoG|Y~xhb$8GgN^;h*&pD9hc}QY+;E2PN4(?H zmxf6NJd_@a_9+lCULKalD^|*XcMlOxgv@f7Z;N*N6alRCe2)QZOi%ygx*9})Yr%S% zh1~cJ@Hj-yK>BJZ0?eF{iDH4n*uS}9urr|Gcw$0E0kR@s@i{#`_1I02;$SbxK?Vv< z=r^0yE`MPF6J_4rMu}5+JRX&DcHsP%_RMbM-J6fbLsF1YQGw>_vOdKo@Q?q)$kZ(uvhx6PjSBA|l=$bNG7vxkOQu<(H-f$=Lv zCEf4}zl`6a-bhNDOwe}@TbD1b&pSF#qS ze2~W~gQ&u)!7x=+nK1ljVoV7fC$HrH3NbKiz-70l3Hl$Mr7oeawHBjvmeq7wro_kV zO2eVip9S3__U3LoNy3Xb38!UD!;3t;uov%s<>6qTvqJf;=ZReNpdXgNSLX-%b<6%- zso2|i&CA6N{tMu{stA|FB>A#<`%%NmO{Tz36Sz(!o@Ia+NsylWYJB-AA?ODjk#zwVlLWnQ$avqZ^>fuz%?_bW~!{HSO1 z-U99|bpfjGGoUW%t=N}$7Gv*OF(E5eX54(I!*n(r*yRKkPvvtpQabH<-SXTqgYhIx zQ~A-QL87{`#p8laoSZ~Qu{ElC_4}uab#z6_gwpatkX1eo>}0l}@Tu^R*`w{D?y(p8 zhYht1&Ja39fY|`ZtT>;`o=U%mp|^iS>%Lw6fcWdcbK=lSAZGd)_E2@G z+u!@(Zm*Ic2q8@>RQb!$+5zJiIh%9UJw-h%ea}z%!|T&l>@|p>W5oNFX`PntYUMw{ zOKX+4KWPUI9y1;BP{YuXajw)&)f239-v^eCjR8ZfZ{#00EDcND^?i5F5Cf*{$g?v$ zd3lS506*#s1d4At3n^53#Y?GDGz;UGl1!M+TQC42#LC4LYr&CPZ}aId_*M6q>3M6Q zFFd+mJ_J@77ypEQh2L&599K$D2vKxeA5*7vyFk$oc_-HNPouc{Vm(BB(H6V;M(!=C z=iU-4OfG-$>no%sM%yj#=YyG~@z!$38)+^8t<=KA#78nbvEuTlQ zM)Sa3Ej8W@tGxC6W1K*uJ21Bm5CiPMjYwad{aq4hl;sw#^U*w{av}tC#>Ix9Sh0qf zAM(xr6<0EJUGj+@GmBU>x-mAcU@1=X@Ov_9CGNG;h|KTS;UgD=zr^lahdm9`LxcHo zM^xAhi1B1U_`zZN9tg+acE)4jX#l#Xc~AD7hHpzLc16G<;T9Acp{g_JC%Bwex7h_` zOHja)hy2pc9WJlSJU0bY`NEb2DH$fhw2&A*ZZJ9P*q?M|U57nhV& z(%KsQQylefrLP@gx6)AnL@)k#0{sKJ*NCR3r{JsA)5b^~3Y88?3|2Cl_Q6;e!-aBhl8+&Rf^J6Wry%vrvpqzfwi=kZ7L*aeNg%A!4tP{3M7R=dbF|G+;@W zfoE8FF;)^BvS2KyJETiJPM@lT0gS2cPs;CnwaqYpjLmom*Z9c1C43WT; zb#uMVK0vQ#D04o_un~_u5{gOhY{)+EjngJp0`D*nFhU3S84eK@L7FuHd^0{^CO{2C zL2V;;%fbd2Xu#SONG4MAZ4tH00j6t7xNZbswu}L03k9pWs0Dl&Muo68N}P#g?ilk6 z6+-P(IOC^32fKu9-*VQ92(Ch?KwinlwZFe*nQC!%xfk%DR3tTt#)~soDF)L~LVhcq zJu z>b;T0g$_@%$mV_G3e*+N=5s~-*k>5t3r~cD4t(jbR~FV~iJ=R|{KE#T#Na4ne;>i3 zN`g;O)%vkWLsP&UgeWOJ!3Ke+qyJ6gQ3i6?qZKffjztzZQflzm^9gaybqMkKEM#%U z^%f(Qsk}erOo=>_6S0N>^UPz2?Wo~!&n6xAD*Il%kJw{x2|wH8*mbS_7FDy#3~;V% zR$VY>O(14nFTIaYe<7N$Qe<$rMDlyjXG2u?~gae4NuU}RF%FeNWGX3|#GKj9?v zlQ6!QwNhd^Qt^vavc#VWFIJc7YO^{rxT@BoozwNOrHJ=e=)1GvA1$lQ*#FCDB*nCz z1LF{Nm!Y0zE~^I3S%zFOTbIRY=Rt40F`eh|HeBr2k9aP1hCq2Pr0KT8F<{FuWF>Sg zpbl07&a`T|e z50bQ{1|8t>vGF-aJ6-NWK6<>~N$rAxO=;Ddkd05LP_mJ* zgzqtlyouWH4RMUYSd>mtn;ozImPnZcRV*Q`8TrtG6git5nU4A;gVPho8Y%?i9M{Qs z2K>WgCC*6uwV>ym!ETq06Zh=6EpjYvvd;iSHoOeo9q6gxv)ieCe(|(uM2R73;{&2f zg5;_|P-w$vI(VB}&ys9*%smQ3v)WLCJ*yYbWa;=)A}5_-tF=zy6~)+wIdin@c2M#Z znQ^4RTzc@0JEt{AxVbMk!`bdpuHtFWM++TYx{IQt)C3m_dHd0ZhRz+V{6XgO5 zxtO-0FepWhGDy{Ea75x^MCz4bed6L`k;oR%^nEv;gl^iW=nKSW--!9x+zY1}a_PrS z=$@|5ANhohq60MroM-m~5{`cLM@)!*fY|IEI6)3=``y*OCu>dykHr%U*+)S2+j6j` zTxlgMsT~aQY0K3+DDhV%ulxi$8i$yaUGSRP$M-V_Frq zTqA%%76b#yFh-RkR9txsR|7Vn$NTvFUqu(}SluQM4#QZW zA)eBl&V%Z|t33a$dCIR00ukT@O2;1Y3w$h8^mc#9Ki#NCy%C>;`*l1lm(B<|`S>y? z?^6u+Kn~O9tct&ZVTIL&yJS=)?~&wTVmnUUN$T*8h&It-t|!sqOzag4c1~O&hcf7$ zBHW(3=;}(=>eo`)FW}Z4DvmXsn1TXFnn;Km3Xud)BB2#yp~25v6kiy)e~{iKhtICAEU-)}lOA&9n4_+n%_2VU(8l}`{14ml;FMbgi|1Sb zX^-a7@!W`WbUG5V zprq&5`Rl|N--j>c2stiHnaDA^Ci2$SiAqk)8N~E_&Ee%#(4a0%_n>&&+n{((daPkn zBc?U?EsWb!Z$dr%-%j7>Q$Rm+iPYyF=pAk4@vBi4zaQY83q-wiJ=w&jhi{ecntusU ze%WRFZ8mAr@)RXO>anQ=_w5brAFPr2o#GL>YSMT@~FOxLSV4ot_%I zV2TTn1Zw-<+84$E8HvxnD13Z=VHp|t;zgl9tRSC%-Q>xuq4ar~^{L#GOJm?7@80$O zCC-P(kt)i0Uz&L+R((l|-0WUoA*JJ2u@2b55JdS|8J`*iRO%JOg!h$EuIU0 zS{_jSgdeEhECi#xcs%5ZNvF3<=*3>CpG7=iHU$)FF3+bJJ7;qJZy1KE)Qp7(OMLS= z{$|P+maF8oKOj|t@EXR+()TYn%U1Bjf?Q-9&LKQDa_r8}yf?yie=zp24}2tph%(F< z4733A9+rs2-vb+mzgHrEIrE@dNE6+2Al$7t3LS%ua{Z6TX4YRJ&CJcwqXt0Sf>DV> zV12*WvDSgO*tejd5%|@levSwVgxxe}G5#(I9#&kpB|KQD0M)msCMW^U@qk}N@~)X| z*rnacf0j%cvc(3wki`a>2^D~G96sP^dW#HmnSXZ)`c{x{%<8-a9maRQ?rq;gFaJ5A z-{cdFgz4PH;nowu`D5D3lPg#68_MFo-qRfOdyuPQq{z1>LHZpWb-XT^%C!)Atd&zxP3jJbL7<~CgHda zu5~fW%9R{<`#s0UCj(8jW*Y`0OJ8r&;A4;bi6%UH*=H~E;wfhmv6~vTYDD4+&W8!K z54;eyX7x_~i>n*zH-|!zB+Dy-UqO}9yl6QbxlL&z#p5*{WSU&ztqyKve$;1h3GvLki95`f9KyQR0=kdFmWSd-@-o^K+7!Hs{3WD zs5U<>*Um}cSc#s0Ds7Z0(g~_L%sOt=)u<=VWdS;n0mC+X)db4f{gy|cim5!lWR9EV z4w7Q>omG)q29WdC$$OSX5o_9m3$7d?-apH<`tMq8VGaO-*e!OmzZv1ecZbPuSrJa&H4Vd_5XFI$F6kp@Xj)45^P5 zUC|wXKqcgNc6}F-M4n+N6d=Zq95A^dl>bOL6Sqz&91#A*je3xbjErXbybP9*>m5Mq;%?<=co+~R|^fngciNMPq-+S z=rODL4NDU~P*05dymICxqW;&<$>g~*IYN$=@YH@$t)GE4f8Av#HwGfh5wVGNaUHxl ztnL@4&pli&uNRp+&4ldC2!zR`x1U9XigV}@_BF^J+zb+V!0pugy_0xF92!SRj{eOl zlrTivA515!1X4t>w;%0}A2Q8!KbpuRW^cOJ8}@~I-l02ht%(jdJ%0xhG024sM{KB+ z^t&NVEiy_(X^&%XBrQ1x53j~=D6DF6K6Bmc3ryXmIfsVzOWtDoKZ~T^ApWxs=(or5 zt9%%e|LQ*;35n|;Gi$%}-`4b?m;n}kLJ8AWL}h~>LKu`OHUm6Fj?V|GOh0xtw06QI z)UM#<-M@3Inzj0*sfG)HVg`|&w|F2FzgF0#4$Uvu?o1J{22Cc5bauoq9s|st4`oCw zF`5z6$uKKjzcY)a+^JF$kLHYN^~DbuE9}58d4jhl{W?6}OQ-Hei+=jyWWZiP9(`zp z5*<3#|JgS#2UD1z#&1~;13dfCPyKn8(xZc7mohJNZ4Nq2NKhM@u&+=^1Zk@;4y~xN z5=LMuz*LKc!#4VA%~*9lO7Lwg&)_tZZhTx)>D%^a1`=7>)Ln%ANnuv}L1Q%A9^lil z_5vuK;>C5<`kcHhRjYR;O(hb4ID}1X)O~VPQr;iXi~ z87Mp2l+tpfVaM~`W+)v&FEf@oFLuEM&y ze3v&qVPj&d@EJmrk-)vyEbywKFmDATC6WQ<<`#G(q~)KLH5u4J4TTkVGElX4jGJzt zri3^og3*(bHk%DR&fE#ivTCc6No;GVWDT`S07&usUq5OHB`q!K4}KvmTwDNnyn+G1 zpmReNg9iDSKslYN!0S>KkMAuaRz?`Zm1LCNUpbG~e*xl`sDcQ8H4X#T2D8{z=3#<% z?(YutdR-ei<0&BvX}<69>S{H>)7~ykJ~_I2zyN}vHhA*R%-YQ31zPlKQi^0L7&>5 zXCQEJAE1F+gFig~aG&4q2P2A0=D{*C^WS5fMiiicO|&pkgp)&3=+6Xl6 zb!BomtE!bh+}i?`bn&bWc1Dw^4*Q-7;Tq-xC}OfhY3%I+ZNF}J0Q@DfrTYgxY7Me> zkY^aZHNjQ-dqvrne;#KA9AJ$S%3{Nro+i(JSS3*XIvc0IhXK4gAUYm`gy!q(+tf+Q zikl0~il=8Lrgl*CMIn%0Rp;o=$ijo@cp(|00kL@di6tBco)I2&6#YG{;>T&e*L*P0po{)|wSi_=w@S@q$bGyGwe^_rkm2AJg zb>vuYfV^hoqxzKpw4hTvc`%==!Wa=0`e+3Jql7P59H)?b1sdX7&S%eL3U|5U=8M21 zEaKBI%WG=cBaYAL(ijK8Tp)*u>`uV{^#U}Wb#5-E3eZ?*bCvY#EEyV$KU=Ar+d7vO z2H($D0*MtDxPiAo)Xqft>Z^*1`WB`{#zPK7k~zna`MBn%xP@1I#v1RpPq4+uaeBwR z8?e;m3z|+82sCUVP31R2WbZAHg?Ci%aQT#XRFuDN2bFjh&|rAvLQeo0KQ(v77ng~j zHG)N0%7QKX8ZNXLNUM!y)=pB9_7hh6+t+=C|u)3e4f~6a?ApR$|v1}=sH!K6Dr4Pq)F{lA>3MXp!2nTY3%!Jq zO%=_NQD5LOZ9wThoOY>iLqow~J1_aoc}R*}Xr@bxml>!hl)Re#l~TwS)FA_0I*vJw zqJqv3FF~T$xum`Fi#4pHFwSpG5YxhV!R7GzG>G=#MJ)YjpxwY~>>k)A$1Q&j>W1E6|9gMhbHjcsg@ z!JSXlRbc7&I1Xo;+B6|JZ@E~M)$|;LNro~JQ8-Qm%<+p!1UsMyK*Nx!Tz{nET(@Uq zG1D_qgUorq)I-Rt^jr~{Xa$ydZOy7$WFppRgwnuoYJR|yXV!X67SUKdQcdhffY(HT zr~mhy;&Bk9qS-bWrpbUBDK_am`mZr~UTxt%p|BhhcvsBaos?n*Z>i->tAWUI2aeQr zXaB|;JvU;rxyILOU2*2Zzq|5DmD97l&7S$1bCpTUMdp&&VVkI-PiLukA2_ofuE2q` zrLv9`S4+a`1AWS7H*DTOV8zwb0)XFhU_9p-{BA0n<-BUgT#%_n|4$w##>V55P9NhT{`1U$er_RKWI@2#v?5@?M7 z%YQ9qoNNJP(yy`NC=f2bZYvRl)wHFj z2*{I9MU;TEuwk5=cg9!9vb=iRo=;INNrZ;O_8z?Z`hGC8&sw46u>qsg#n~&<+`nN!gybqi+4-v+q2O@JsJjuV#d4BGDhz4!7hG1!;YF|CehAAnRh;1&zLg!-l zuD3cDc#*WyN<$gdwfw{wy_F3SGi<+km9DI!3+QgJ@ZO;*K~_VdHZTD&Ck2B14BVx( zbrkVvX0jtnJnw$_%h)?6d-Oa|l1Mu>Hn^yu-EB2x`sbE{29$`Cc(g7|Bc}HH_nHPv zZe2f=-CFB!c~u?};qVoZpLw*1J}F5Pp4~J$pH7=*4E4ouZwC3=iz!pmM8Zg8^jryO zrZ)bnn~ZgkrwwrWCT1!LMLSw@O;ExM3fg1u)VQpQHi*(O9d&ARGofLB!@T4I8h7%|hbls%u4%Ubc~sh3wsRun zFAI^!OmmBgFfSW_z2wY(b1FAF{p5ql}PK^1|pw+ z7*lEC69@)gwOGsYe%SuWB=tBy{*?2?NYw|NaO5~S6Tz@`k2{_f|2urT(RL*)$jI@} z2_AFynrU2{S_Szo%4+g7Sd#hn!@&DVFr=h>G5VbzpoRwJzqSa7m?m+mPgC8@X6V1QLW-o<+ad0?DB@;eKYGH;1{{7LyiC5@rM8&92+ zKTI@c#!OKhenzAcmWh;gkgFT4mf-RT&43$vnO{k}>!`~L1n=&9yzL|m5IJPNR5n}% z$}FNGvzreEN_bCbKA{mxOE#i9atQNZBR{2P47zxH;OP5aNYhVCu$&K}eTbAf& zT~Tjl;Q@8GrjC|JC7S3lJ}G&=9+t!74~!9w%0NPFWx#7@29i1DTO#fiBYMP!kk!Uh z=Iq9sLKb!#UNLfdT+DyG@(aR-m%&w=hv?p%ceGinMg5csqlMG$i2Pf)>pSEW-9&xsx|lW1EFv(8=S9vI$V#8he{uIm?YwlRVG#GX$7jpP6cC_@jd zf})chfwyX`a4NGL+QS6Ets@3*9l(1wJp(M^GC95TR!_wa&w?+`0n_#86jP4%4oE!- zxu>PcGa=w#FtYI>$kYf{Ok!c<(_+XH8;mRtMX?5+v-iGV`OM$LhD;-K$3IeT>V9WQ zO|2^3b{^= zzMGynL}TA~@Zj4;=fZXV+}+=8JWu-Q;#OECCW<>r_)8d?tx24ji-d=d4}RG=v^~Fu z1eS~<9 zBbSt#v#?_ZP+nbO!C)2~87(_Zz*msb3{P0(NSGyN?0w11vSX33?0;Tzz`V9{~;bg zfdHfz4z=JZ>Vkgi9j7;o8QT`uM%g%94z`$#3@vfvx=MZo$nJn3VmmyItHk4gzEWki zE$B3|Z-?TzmAll#Y;oY{*Bl`ELFSt0dr6d)!>gee;1w$p4ShGXm`;YXM9xgBsJ6`b zsPm`7N=V3uKnJc7Tjr{1kYIt?g9*(O^ zB!C|A{I44>3tJY|ZWUN>j-Vv4sM;aUyuY<8QRHrvew+_*0DCO(KjHq1#m0&&^s>uw*#lGv3f^{6dXbADV{Vg&9a4J0Z(R3WHQ?U`OIRVplqQ-lv) zt7~5{;1PypAx+6%14&9$MIvc4pB9m%=+x~u)YB%i30 zE*o!CHcdUcY?&2MI`P{1P~}thg%m$*?2UCpXr_J`IxigQU0>77{;= z#amEoW!*$<%81IzH8J)d@mBQZ8~rL@i6xfqsRG~Kr3sTM4*4|Zz8pZ0r~1MYKLNP}&hKfr4x z@w#bBr-n>p;&mV8(&N*Un`8{Vujf`r_(!l~dAFg5OQ9x4c_p9CCs2eqIzV(u$nV=s z{4{Ykq_%I`Hzx$PTKbQm1>PW;UfDo|nby9twr=Qpr)bi&%(&n&ksSV&&c_i+hQe~* zh2^L|Deu}68359>`le;FAtS@0M-5$EW=WxEB11!13}WVHgl4%W`Ry+1r0cjc`UD1! z*D>rz>hAYSB;N1MRuuoWG>(VcGg0;+UU8f(sRW0dur?aKbKD=uSF@qT7ED@~nar4o z5JT#~MN_p+<&^X{-x;o!TKtG7Z%_kEXjnKxs;8K#PP+zJnlh5pAiF(i0yP;TJs48e z9xx$@_y3oxV@%pMo-CvN_Te$I`B7s78lc$@qV30WiXtydk;sR&F;0s-G7Q_ng}P50bJg zjcwpSQ;E$QXyHQ#^#F&A?JI*8c_hX51+SodiO&RmO-GRXhP!-)D*Hbod=ybEo?4|8AJa5=kpB>|#iBee>@FgFDdx0Fiz8uW#-eS%w2Y zv0pSHgtBNyr&BgUaq#WV{)yGH%!&uym|f*wx}cKL4JI3yfwq(T;Vd6-W2<($Nx#ys zlMZlx-#!0ex3<={3h(#8G}9z=N>r{mA%|~BZv_A-08)9u1b?_##lO9y22+aPyUtC1 zq<#(>jdMM)h#ERvyij`oCu}=wI=_u+Yo;>43&j1l3n?nmL zc=`xeqqfm0bN-c?y48XFtYU^+D%YIH=l_XGhoGlYnJcxtS~<0*(o&je&ELRTaRGRMB>^O$8h8nG7mOrhr&lJpxNKJYDLF&5HT}4a^dZU%7)0D3#J}11@mO)0 z*87<^_IrnNI6h>S>;+XKVErGe-U28K=ldR}8|jc1>5`Q0mKG3@?k?$WkS^&4sfTXq zR6syLx;vzi?sxI``Of^`8OL!P6n3B8yXT&B?!CQ4NHR2H;P8`TL>#6ZwBvoG<={%7Uviwz)c9(MQ`k zi1-&Lgm1@VZG42q8ky0+1w-I?MT)Mte(00sX&Et)*QORg;h;=K7Li|Om0Upf&3YmF zddK_{Pgl&v$5v8GX_v?G1v;^{@}4SqlRm>lI`9O>^j57MH2Wfvpr+>mNuDqiW|V!E z7eBCTeR79jv2l(}X}G=X=G9{RAcvQJ2U@X}DI`<*U zT9rhHJuaOzDr4GVkua!0QGB`6r^9$Ryi!8T>VI|Rmeyq6dz}cA`s6!nbN?}FJ5e&* zdqLzp|Qme?vOF-DVj!zj;eOD=N_e0niI8iE*~5L0?2oYv>)Ag8ux` z8XlHdouDd-KmvGL^}+T^7A^F+feU|}da|ASATE7GxC{K@Z7Z4iV zD6Z8}$k;TCL(Aw~A@{pe3>KR>@r1X41Y;MVw`*WgM7AmFvDe!*9u7Wcy6+K?r}no6 zgXz8~mbQb5%?_`sm~S}W+P;~7_9)b4GukP;x15p)=tVg(hD5zF5$h(=$Pad+d6wou zWhV81>x|>lf@|F}MUe=Y8p>j#@>fb)y@+U0LKl{v+)Vfa({!nW%siKGP-_sNM^D$reb`0O9^TJbJF$5$G77(Ny2GO&3JPQ-YlfhQq*Bp+ltenYb zk(`q=tG~Cf3PRkx^$;r{KQkw}D#C;Ajw||Y^_$7GiL4w=4bVU?mVEJ-R%O*$5?$Q9 zf?X~b;HIo4CrHT&)g|uyyitlohs@91;fBJ-#>QC%L0S5Uy8h2#_|fIJ^hgW<##*?)KhYhQ3(G1+&)H5wn&iQlv6p9^?ed>R3VwfNb2~gRc5UxfcK@1`y{gAyI`% z&--0uGztL@RLQlA&749}kk~Sh@82iQwhYF0=jGa6mm?7pk@as*MlHakb&(i4zS(AVQq(^7 z3%Ye6o$?QysWpW#AK#uy4wCi`2s1RuE*jaQRjHhv`W3G`p+jPt@0vdj%Rx{??E#b_11-Suw5Idm@yTVgKc(Sn^JNZNynC< z#CWChL41NnrEbi9zpopOhPHS2_C8-5)`a6`|If(CiM0`Of^__Kz33&3MI#M2vU)V0 zerpx9G(UJ-3O@I5du^Honfn>5Ji9SJp(3LvmZ_jiEuFvyno@Q z<&A`bk^zN_F`Y!}-shj6T)JZ*tg-OJtd)6Xc;bOlp7@UT9vg$3JES*;po`Qf-J) zm{>HqEbD@Oz@xyo+EKeO=8f6*(Ll~ty6l?@jqOG z&IH}K3DA)|cbFxEDN!*c+ul*o$n3%KdWu6 zNvYytd&hX(H<@5|5W9hnNF!@d%>4HgkZQiGaUqX!hRARtyu}x1+SVG=rz4$TK41-( z3%wYYN7!HvNgvxR#hE-6)7J)*Y!Ap(i)A-$|E()C=!Q@gk+J38wd(ZGdZ?K0$|2=O zcrA?=NG6{Nlq0BD*6e6}dsaJg^;XkK>p!~)Av4K7U?NQaIp23p&8emiC!zKpy=v9jL#}UW2FRT14@4 zbVf|ZC8O(_5F7L{b$KO~e$Dx#x%1VLDu&9|cEZ zJc<2ggrcg{4otA20Ci-0FmI6q!MVx4`31EQFV`OhNxAjafGWGb^mi_US62)$ zX0KHwjM@5(5p)x~d7ojFJm8UyY|My4isH|E%{DZ0>7o=y8cacj-HlA#e56MVmXqP9 z7ez7F@m>+L9Y3@&>AF0&!oH)1r=s%Xfb4OJ!X$$OXp3ktFUp!v+MvG*rbbI(MgG9j zhi=?HPthpnMm^nYN>diRZ5T;K3hA!nGzkyWz*q!2p8D5m(gG;cc|T@K{G zaeJ_n;BzAi`EJ4Y-QQC*Sj;f<_5ctFaQ(jxw1_w*yAz9oK4f)bn^M2CQZ|v~NZy4? za)qrJ1EHT#Wn*z*^BhQ3>s#DAYFr$uv#S0(0f*Z?sKaGq;&oX80zh&Z8IA)V3BLva zOM6jefY&)}u_8f&i=c5`Z1bpN783ai$MAeMpWSfJ~0b8Gu z=S+qUZw4T_Hn>n+omk~^O~U(u=_&7q|H{aS9WC3~g)5pSErLjEv)c)cuz<_6Q9{8; zp4ok_aKr3Z5bnCAXxsU1afzNb08rg$OoY8V{kc}#N|&M572K)?x!{e0<$uc}qf?X* zOrMjRQF(krQN!MsA+ns5Y`(597k;48u;D%zy`YShxpbber;oAWyN$k8+}6w zMKEs=M@w~#TG>_C#pRoRbj5SxNN_;N>nXovz+!`Fs1A41YsTL3`PUpC{3*X9Xk@w# z5)+!B+}Byn)z3F)++uq#z}fvU%UH9ik+Up@mn8_J>!>d}%+*HzcB84ok)N;Cr8dIQ zqFoM4(sb&A9uH(HSxW@vZVk=O2Qbse^_U=B&fYCbE4O2}p@2n3z4+L|R@d=ZNIs-6 z82puM!d8~#V7{5lSRFmL%wJ7?IJL`rkHD#-#RHjE=iZB#A&(xLGXwFht_vU^P-Ec1 z6UBFG{buYej;`hXrE=|$DkK})4QTK{F@why5T|d=fv$&Ytm0(IILsbPva)-V@*+xKUAFW24y(jPkJPte2mf=fTL&lqW5ivgr@TH}uErHEL35fh1>-sQ@hwnX08bQCTACI`wlKGpB2gYfTT?7D3M>5A zT0pDB*y>JwCPm+Ou5_^J@OjhUpc}EsZ{P zGLkC+SYaGT^QQxoH!VV){(srufFMP68Yj`AJeW?SNwtG<^v$#zQE1W~_0nfH+O}!> z&r+m7Go!W6axK4YE@nE~#&?Za12Y;J3`x;so-fxXpJ7l{HswdjWj8617Ig?p4*DIG z*0~d@$Xf9!X7R=7a1plhPGvUmEu`otrNcV9Zsf#2OBK+aS)RE2m-XU&bN7JrAqX4U z38fuZL$gLznJOntAA!xNY-wzucI>qMXBA+Ty?>7!LCH;^2y%=o+cpf2crBZ!p9y|Z;j^ah@Z ziP)5tU4M$&I4XRB0!A`$?*S?g(g*^$Pbyp~v?$_VqQ3VIDJ={*l0DugrOv`!aok2% zUq6|wdwo(}0t6F+N?qmTZVcXN3+`U7q&iOQMiV?MEi7^^4JWTLHb;1jQn+!pc~>2p zMd;7@TrFAgd|Aq{6)$(W4-!Cn`!VTE$Rr*F82fr!*;iA_`oR7|)ONaz?Cj@@yn&;- z?F2l|hBbhH(tX9`kUH)KW>Yh3)W8OaK%Qh&7=bO~8De+^WJgMjb0Io8jwaUr2 zu?iLaF*7|qFZBZ!9C7VuQ1$$9!Lt!6#zqZ?#Afl?Ll7HnD|$C#mqM%jY#E8(UOSfh z;YurA8o8Um1ZnhlvvNWhI01YD;_ZaQmuo7EbU5|#2T9Es*~6czNVjvq{V49^BDE9@ zOc6YOYYdd}ghn-s^LgX~?^%7h&S37r1gJR~Gzidol5$Ha+w@e9AN%$rh(e3%j=miAW4 zIFN(IOyDfwnno`SrhheI$gmUBlN4bBa5v0#_gR8OgP*`AWeH8gl%iCj!;W3O=A=@w zA7%c_^jxH|Td36R{QDiV(uM~<#>z^ZdP-d^Mh-Yz2&4PdAJMG8)Ivig2+J>IkRpYd zLqlFPB5ubea@Wqob_v&<3V=W7-c=qNRFah zI)@f?{+kos>vDF6BH@dODPRNs1?f4Ea#D658EyC2x4B_Y&3s{jkOA}c_|03>Wv#rz2PPJP4RiB4JY}EA8P`vF@@q5|A?c0Wu^;V) zR4KY^zMHrc0lw!opp215rMiDR&({{)Fbb4#gFm%>*Yy~X3gm=czy1PKG; zGI4QcydT&-&Y$rztVO!0yBX2<>QCJBX;dHMX~tYyt*W>jt3hw9g?B&SzsvPp6YyxO za2hV$LlB$(<}Du%7Gv#QFx08Y!9;*}pf-j3zkmhIX@{%bL5;o$VvVLC@n*ty{(5$T z6w>o7#p|eUX}=dV_}w$*`h!3ZlVVeHt4_MVY|3)`PceWp^nsi#$9 zrn2wm1gwhH*#*Y5kR3OCrrik+Xrxq0CR)OGL--e}oPom9Yh_k8N*2jTc8l_G+%U6A z-wabA2oByuc487Hjoi<8Gu`b@G>-4cj%Fiqy&nwe?zzMUfD;JD0Qa0ecfGucl15Gj zsou7+=s)gX&*zS?vVwk8MldcS3&W--PpNZK9aDzJvV63)vXPRD&5`)Bhqd_&HJM*> zAp`+REYVO`7iToa`})v2C%ylaP>-p)*-{DzOZ1douv9oc?I-{|~bWDFka^<q=c4(enYU|q$DoetAig5fUmKpG#s=5&hu zA8MM_g@MPcW!8?HUP6T6g@z;vqYe=XKO`9VSiMfrOnSzxqh;S7E#LCe0thSJmf!8`IwuS<@&?Yw8Z>073fGF9X-7{U(tO}ripYh z{ozjWwB759<~#Qbd2CxQ0!3M?#RSd_`DaQ=1#F9V&i&xV(e#B6sy}%~}~h&jxl)T(zJ14HZ-0oXfk`C1czMmd-}yLA2IBDLEN?0 zqKnr>TmJFlMB30o3xZFg? z25X<0sOF=_8q0=X)UE1VmfIir)3gSk>76jAonLdezQH!HO-3T%KczFQf!Y<2Oh#cV zOyj)QO5I0DipRq8qCvdoPqq0$7b5i3Ve1vhdHYpUCi;O95U_NriQ-xZ!s_sD{uI>%QIj zXrQ7{6t{6hX^n)H`pi^%#l_g{9~{(QaDdC^g*ggN)eDu))jR>ZH|n``e6j_eIN2nT zWr)c=(Jtz{g2KTvoDr#pxz>U{KnK2Du&)*>oP#%>L5ak@?Y#f~I?^*6TpYS68F?u2 ziCjyfFa1H*Wi#tnr|U!p3Ap}?B0veGcRHNHvlJ00 zsmHxwW>LrFz*a@1*Mp2Zqh8jQ$R(8XrU9;OP;#S*mMgAFl`JQL=OBzjmPJg0$q@zc zNd)t4PLTYed!2%UOZ5&eE^6jK(JUF_W;YW!N&@i&`rQ8y=LAeDc0p-qYWk|61`}lA z0Pz4b#>~4()<09QX}N%4m|Am`$Oo(jTVi$1dLAip1-D!8y2qNx0hiEgs8wSAruAPY zwe-f`?%xobmOaDb&nHe&muj88uQ15_xiegiEG!7qA^G)vVSj7idshp~hTfo|Ob~aW zT_T1i3=#na?~MNL!_J4c_G2oBq4rgtA532PbnS)r+~lq30UfUrznwF{fC7P3)_JpC z_`#td>0dOZRy~Q86coA{R4l=u&z2fYAG!tHNnKyGZmVk{4Ru+>`1rV5tyz_LA}NT) zubvN#hPlq6=E7(FkscW>0yka(yZWQp!)XZ?@-3xj=QR9(L=``XKb+8rqiiKw8dfEU zCKl;=WHNtCWvj>r+*j#Q;r+x8{sP5S@TK$XxngTHvRLPTA z4B7!BXKzTwF6MtXC(tA-vX6eiC2J*{ix?f%K^W!uVN6&Z3S(aLsjE0U?F_Oe*`{eC z+6aoqLc6{5`+om1o-sq;JWTFyYG}|JV)?PEQ;Q&?HypB*Fq!I<1uV97He3X*8TpLS zSXU6xldt%QZG`2O73aREbyI1zHs}%dUr|*vY;d_O&cO%uWN;Xyn!^CWXrczVx3&m> z@S2QkqMdRUDnp$t*)N&4H}@J6SG`FpIi2~kq4cC`*Q^iHnDEh16~8kYnFOTloi6rp zMeQ^4Du~|wls;#{h6Qc*!<5|i16pJ+e8*t=N`;?;qApH4+H0c7S42}uTyvOQfPxwA zbVj01FE8tlYCLC>%b=M>1|FZ=3v0)sVtrvmv@O*hH~J;}i{-~kDr#kShy|a>7PYJn zL+VPqSJOo$ei-6kFT>N)L1~Jd_2>;X%G56+@oYc$=gm@@tL1>bB^;K2C>N?dDB+^0 zCZ?x*!=zSDN%+B8?@9&`U_Ad3U?7KN)~W*~Ey3n5W%Bjbfvrk2Mz4PO^}ZyA2vx27 zrzLOz4x5(Z(&>2es0&tR2Zqb5%yeIV^gUOQp$xN;a}dj8)$=JPZd&riz?BT8LRs}7 zL+kC@ki|6qgLt2Zc+|ab-(kNTGn_xB0mG5L`zK3vTA2(ZzvkfK*~sI*RZ;L<4($mK z*fmg#b%v%Hs<`l{{Fi?C=_$PgrIW;lQRiV@CpZdNcmmPuiTFE9RW#vQvdO8({v7Sr z-lYNFU=qgR#?10^_Q#*KDZ^~wCiFg2e@W!38c}6gKZ!)%xHPznSvb(V61Mm81G8`5 z4>_MbXk(cw{L7M_b7fGx{S}$7quH8P(~vZuZI`A84L&`;_UN|0Pd3PcQ>C%iA>kpY z(;Y5p3*xe2EA5M%L1T==!952`H|}Si?g^;^>byIL^~wRYqp|$w$7L&-f01XNC*D9U zT)<8jYm51a(Y4>x*2;)+x}_QTugHkWTdB^DKh<4W#%BHuCl{U zR@eA>zIPkkU;4gj4hNQTi)o>)Zl08Xh0lj3w%Vs`E}vT6&#oG^wGz#iJYjw@xG7+A zZKpSYnPA$jcg%4A!>NFua|2a(mVryr7u;*Hop-i6yT!oB6Tl?nSyCE=G7YRfTocNzvvH>z}xVjjj)bmwh|? zb+w5cx?y4pc_)HAHRwQyI3`}} z_rAiiR`@SpgL}fa@eZk)3J#w~!$#D}$!Sl(-f7@gDE2eBgKo?7&TrV8{-0!f5jGS} z^ivo3*eBKY6D=o-Dt0@s=^2JBluvmqFfe1Q&**ZoarEgll&jej zxhrvuy0R<{>Mr_9rUmy+IEn8`3jZ@4L#-n1X_(qNVGxsg<9;%rb&e6@Sn*_5ewNZ( z?D4N0r&Jji8PvuZzN~{Ye-*N*fUGzM&82qefFml= z(zc=-U6?NPnfm?>Nbm-Toabd!~)d501pC;H& zerKW}@r_7q;?CQ{S3n34wD&P4=3xPBQ4bQz$Ld;UA|I*W(=S$a7*y&W8i}i~{JhEC zmVIG6cqU@Kj(l;yI0(XMz_mS`n%0x=O758ZJQw?boB${sAi-x^Uh+e*wa$Ccmmo z)qgX5qNG-qi^Y0{_rcc|)JXjiK7u*x&PMJum9w%OH!~h?+%(K4QKGg2VZ*W?lgrY# zz%>D%I_NqJm+XIgPT@9_)kO1<*D@bFCj<2_$r`ET9_l7AXEE&0x}dwDS8k_Nbj|AT zh;7O9lw9hX-4LbYj0UAbD(e(O0-B)+qczu$M|Z$o-tSoHHa2Vxbw7LEMBcJ^;hvlk zJxUlPwV^-1Ts1TGVRh7XB@866XC2}67)0Fm5|WZHIj2zdRzZVc;bq@r*!!aiA@_D{ zlV+66K9Om{@(rHP`A}NPxpeH{%c?X2@?|h3u>c91|3@;Mh(b%f_a7bZUPZ*HNEko> z%)^9@oI0q`#=mPrznGeBIQy{Zafd!g>*+>5ZEpcJ zRAT@#YRPU`3@gaYx>DzrG!p3AG1>Wu%?X++P$R)$$YQ@5N|2qVs|DpTuTBIsgG%y} zDXg;n*3?M^77dqQC>bfmk7#goweYb+$Cx^sj=&hx0`D;J)SP5!2zB4?w=1{jD~_Eo z@gyS*QEk6%us}U>PE8A^z*a~_+kYYd5VYa$=a+JmI_EInu;+@d1-tvn19W7j&GBY; zOhI^H*d=>CYp30vyNsbLTZf3RPb8u-aWGd$n?zSb%Az{e)v}F4Ny!XC_F6obU8>t5 z>trCkspAce4b;QBUY+Rdfo}K3lDXE(&eVg@X&(+43{oQ*lJxu!{?}*$No(URRT`>x z9z?HC5F0SSnlz!B!^U(NpL+YllnQRs-wSq|q1j_@T;PWE7hu?mg4_EnRk_S02W=3l zB6pufo%#W0bjJTjEBETasgwIwbit{}igF8vx;659H3jf92%3_x+XUI(J10zC_8tF$K7YFa6mrEV{aad}JK}|tj1=K+jpKPJpupDh zy|hrDfSSXK&mVY2BD zfLi*G>jn1#lpK_A_k3_D_m?rjt|2sbwu^Y3^1cM*uFD;-o}K?O#$BxSdW|qt?RtRp zUU_O?IQel$Tm5)zz5N9!Nl3V}E9BX66H6hY?&Je@b?)wjP2(9e-FrS!D zBrSNO_RZODi}e3;M_`fH$xybHrw@3STb@r+$FT#tDRlL%i^LQvziD3%*sm({@2W%4 zDoDxxs=7D8p7S+$)dL|jFFV)bC!JeBn#s%!<3Cfqv^_i&nA=cL>;~m;cR6og@6iD? zxTGzH=@kcb_m}|4yKl?-$gzmdMJolsCa9M~URbylCxHdJADE>|BB%7b3i;2h%!kRP z+>Sd8Avg#i#+Zs`JaTF#lz8lCxwc&#=`4!WosvtNMgFdzmX8DcJtjPUres<@p-Gzb zNXxT5z2+!AVc72+o|T3MC6GyZag7*bw49Xha!T9uuXcyueK1Mmj`k;Jx*|Wi#sbHlBsr)aSx!UJx zvLNs{ql>Br1M6}iC~-i1<_Q%<;YpjAa?4+mk%jb7&*Stbwk$J+fH1(3a2|9+x2jwo13ZCx${ zw}uP{0}Xz041jTC3X~q$-7Ochf52WUD5za_k!~>pp)b7f1^8Wk=57A&$NK_tT3>7E zrD2~@GV=>@8O%Rs0&IL%F>ZOq+T0BP^8%C~%K=5n9P$4gSI_KMZ{T(ha$mk-AC@%H zr%x&%w{j@iko$6L^3T`ecwhoW?tTA~=(zc1!oXZ*@Ic7G0Xdq{inO`IL}*oRc{xe? zUI@r(2NDc1hr5igr(M1+0SC>gkx)XerJT!HI_7V~&cJR%nj!nKjyr z=nOwpB|H~II?$CPxP0k_W{`8S7nVsN=a$kNac(tNg&z5LD#>L3WoWp%az<~?kL1k4 zOAEMJaz=G#vZ_0pnwqNic7O|f4glJ3#J~YSUPAfwDEB;*nU<86O#u66>h7~es&Izi z;Tzb9Bsx$K5aOgu7bN5yN8(;_PBtHpcKM)8lPlaK; z$nH&gJf+5C1{D1F*1OTwiC-$5HXS&ordxEUrT@PiHw$3EfU4y>^gCg`;JG=Z{Bgqd zNDJ?0V01s!DP+FPqPuakG!}BLx;dGJN~h@Ki`7gBr^*=usHhr?O9?8dD1;3IUVPOF z!?v4gp)+5V?g^^Z8g5xIU8Q%q-D^-Q9u~L_%eRz_{jGmq$<=Jr?sYKOX*%_ZAZQn z-e0;r=s7+4`HPYJ)@8YL0#NNFUyI$kIR6$}66vLdK}tZsvRY7D3lAjVj>ujpe{kJG zi~8*()kBiU5p97p^V$DdBnEm=IfBIXejF6hx9s0v*A^29rc~wcENUrF^6`5>KYKfDi~Ngp3x0YHK6``}%`l!xTKw>KB2ggu&)!U7wgN z(qVRQmU(rT^bJNaw1*cy(i4+(u-|!Y)+^9hizTjQe))hVZs!^w$Cy`MX#b0*^G-;$P4=zd4g0T8q|O@MR9dv1j|QzoK9 z^*O)9Ybw=LYt9EhW(V`ds0glmABb;wN=#ieDrY0Y*Bv+R$puCY8I&1{jRdAw#r15V z%_l^BiX3{XM+?yhRd+%9I$~3!Tr(3TNhKREL@RlH#o)g6oOA&D&Pi)|b^` zFDa;~@ZmC{WKBL9U}JB_^~D&|x_bAyQOVUX4>o?C-s*r{#_qAWG*Ru>$4s9+c;7oV zwMZA}BwdX) zb~SfFK5so(p&V4`PGX@P=ql;3A!QY}J?|eI$9}~xw6hnMj=_9ilU8g>!yZn=w|ILJ zF0h<}V}}h;Fm~ULth{%$^?Rq}s=7jC;K004-%bLLJPMS*;&e;{(!P3_-MQpKIfyL1 zFTP5c&m9K%Z3jXNTkR{pB2(((9El27F3qKxQr4WH!qvC)SwCbb3nkYIbL#oHf=PBB z)VEP2h&QtLq*fx_=8QR`dnHO38bjM;BdM+J#a@C%IgV-&EcDJ0-QY?>#vVylunYqE zga9kMV8D$>rZlT_z0Q6s#WqItXiUu5a{I)d&`$_ukh|P}sd%U{QLNOtnUrtCBEVWO zer)pd3%&P7m@E}`cQ1ypK>q09b`P3$=%GndF&M_;vHo&6hnpAz5s6n-r^;9LQ5JAv z?|6vS?^oCG!-Q-H%oOwQe5D zL?0L4n5c=J&jb|gINk&@dipR!opfX9M8+=HqM46oNz)OE`1&Hj-*^n5!egObubp`s z?0SCMOMr8X6Gey8@jFzvu;7Vwr!)+M8ze|1orLQ8GBolgYt~w@KbFx`pwxng|GN5z z{Gr@K+fx~Ttl?!Q^_UBH&)Qpoa~TccS6U6zu;q|$B;Z6N9|j%%qiztAjSX3q_4KkG zACYst+g_yd;uRiWSx_PPKxLrDifF2G#jG?<{mG%eB;2oTZhpzAQR(dYIT|#e%0Rv9 zwK(TrGw{34BiHNk15&yd+fgUZ2L;fF;)n z_-5IaQeJ-Q>HJNX!C4^yA`?fAe79V>f7yOWigmwAt3AF<_U8;yuPvAGILnI2fx7QZ z9g)>-`L}O-atP|S;gsJK+6z)%yQUu0vPR87{gA_T5sYIVR0&`3VnF#1~|L z@F$0x-(BB?3r>sQms}3q;PiI{H3Tgnl#W%AxBfo!6M~Xs>e?JsU2`RNcRow}Wg8Ld z6#v3IHiPPHmK}Q}q)w4XMMb4M8;QW9>gUg&wZteX#`hsnHY|%xwejAuG|EnfADq$u z7Iq!!8q;!@Y1E;xGnUXZG7cdL@WNX;M~C>*g0s~YLng?#{@-cZGU0A*cCcNdZ>#ab zpiyE7G~5NEZ$u^vkEd_-epNe4-8PeCKRj|eL_`&@Bmzw`nL^g}+o_#Gfs@}zh@a~V zU#{ruZl%T-1`dM$?>^7Oc5kIcD6>u;&#a$$IBvLTYz&O!fZi(*v#YB3ua$A;JVnGG z&1S4`Mvk+)^_Rv4OL$urZYhO}+CL%4+dd%O)p9jBpYQt~9%2dTH50=p;v0AlGPwgy zuEA}!xgIaqC?2^}?WzPFyY4X^qsuOHC_p4f6x{%kYvYtEOH~Eogp5s%@bT&ko^DVr zk?U+Y@PU=d=gdo~|z00a@KEeE-YVC@a(fXE^=s|H-$KAdC^at_lTW=ed;cBy4 zMLuBf#vl=hA{Rc98_wWI_0^w0Cszmo9fA8guBw!h{9ukrS`C!c-^r=f-u||M^}n^+ zkLmdnct*VU-4>CBXtv%>V)csX!PC*t{STB8lHu+7T#zrb+x-RY>0idG*JP31@~PFK zrz2Gr{Luzov0P8*x?n3@Nbk86dej-QugKiLgC@t>K9HmNP9ntnkwemP69YsFTen%a zYW&fTYVsuQSTRS1emuEl#GKJcWmbRxl`!M}veRvC(Ah+qza}EX$QglCFw>?|jvT4l z4?QzU11$P&^hU~eus7Q55yvX(mE;SA{^%d6e;0{YXw{kZC~CKBD)(;p7>|M=V(7HC z2L8M;Bca!_z&n>EWQ)Q;_CT7Rq0Q>{3k~jG5AGgNt7PAF5O5^}=-y$Dxpl3bd9WEz z!_>QF`!mVKNuUOfF_-_bdZBHc6dLh0^z0%Wrt5_b(8c|(#_eK#DoaUO#M^hzj`=ks z3ywoo*v~!%i`^h0c!Q0CQjR05!V z>}AOGS#zoqvK0IkYxeZZv^6jV754UrhQ2TYU5NW*5|O*}%Nq|NXT8tjADlOL2n>JL zV_QH_bZ1swTUSrN9gdCeeDCetofk+)^?FUpM6SqTH4>5t+lM{~J(UGj?YJTJB{^Af zmLUV3_^rW6I@g>&*|8{kasGLQ4He?QSUh&htWmDmks=3`+tx+`#znmTn;nyum|{|J zRX!E+5$amn=*h@wEr2-MmzLxJgZwQ4StpH^Y4n-4$PAajU04bQl8ECeE*y*yToCm2 zgOIxb%WitI%>d)M7s^<_#$wdf9?s`}oWZ0F|0WLNF0=dORL^FQXkmS4sf9Oo{QgzG z`vgnrFLLay4r{R20R+%n_Qmb=@f7~Z2cucNON5JyONo2_n(*W`CgHuc*6BFL+KAvC zXr9}uj*2eDvl!v`aP;7QUh^|5*X*Y}@29PZf!d%d_L7%SJDWx;-7U~Q#;VpZ4vhJjAYD)ba>4VavtIJ}fOrF+pOCpI;|L@AyG_&@mJ9^c6&3zS< z&(?wKTnSSiJC?sV@myj>gN>lXu)26HdGQ)wsY`xV!Fd7ljEEwyrsv z{L`sD@mXEp3v*$m8YAW++K>uPSa04Jd$hv=Nxdq&FS06AVr%*}&s0edL3*rYWM%gk z`tL~aumA|e6QEFHp7d_KoVQlNgO9hE)H7S)M0_Z+_vt|4_Bn8JKUo`jsjq)8YWOsw zUri(U^!bLsmkRN#4eQGJ{I3nowrig;7jy_@eLB_$>FFeie;f#myaED$n@PZ# z43deYYz;jAETuDMDhKx$OR!@rLU7RdcQDHH7ltj^ceWQ>mX=NU`PDX`%yRtxw~r^Y zI;n;VidoJ&LVhv2qHzSDdZN{o3mDiXWQivCq=CCioXJX~Ak)%f(1K-gLbiWF{liFf zPIS`nM~1eP+mBw-a90m^_|Gt*#z%w8?qRk<^Q@t2ZSTqlQ)ME^;L&+FvjOuaB+;uP z_~b^w0sXYWB0WNeDndfw1xG_*;?xsL1}q}RhH6ebYS~c>qRVlXYX=#UtmdP~X@Jig z?MTLHns3e{U2t*JIh&2Kx^q?b5PP(O260KcFBmC?A`@ydwNWBnVfNk+asY`Ticf$K zhXNOEHc88ue9?{|+xJMx{a8)orKvRItcC?HVmKJty43N-wOQMs&iCOqQ;!+~P*j;l z)sMehLv>F^t<=rBmk#6^tU*KwADbIiw)Qxa>;gu97s@%+V%N5`5ET{{YIgeiT)MWC zx;7a9z%}|X9mNcg#p8b`@^{V2-{${=+SUCI;v}2wQ<33z{06kuE0s| zHe+9O z#+M4#25&~Q3 z;ITmipw|B1yQ`CPpx5lo_E$_4E{tsv>OiU&c|wfRP|q1fEw-~+pVR)ni!|IX^Sx23 zE_pp2nWqVd(8b&v!y%40UF0W*BQu@BKYpCCI#=|+L(-kerTdv%Bvhyku!mxfWp%Nr z7XFyD`p8D77g5gbf$nt~|51;9wmZ#6V<`t``8T;m_Y-yN_uuE?{6t?UQoNjog0jCHn-) zHE*znqR*z0qudCR2;%n5q{EkD6?G$@lQ5 ztGoAl=6E^QBQnMCf_uiL>}%Rb5|O!`J`MX@^3#{255p#^^Lbj)n-R!%DWQL3bzR2! zb4u*G&K6?U9**Ax&1}dX9V0j^gyaukO4Mlg{;{rXfoWawCN8qfFD&FL7>%RT5|%i6 zJ4TXhJR}3=pz(!w&N>#pbaj!Hdbqj0*jIV~o}ESRNHvL5P~O8TKvyAAZX)}~Pye)$ zkh@0`5X}nbu4sD5T`#GwbZJQ|N=v)JT?X&W@(t zcU$D>b)#2vIy^ho|2-qbf4y=cC~68J5Qey~YpC2%6ab~wvaW*8fDsMhd+mOWW-GT> z)50mqf3O5?=Z>bQK?XNqw%E3Q<_`J9*x_{i^!=#mk~ax6pN@txh?S}8`@0KBfUf2< z){T8SvRw2TZBK1-e`%+8>LGA_Viv5uz}`{i-a<}(Yrnpn(XrUlW>x25{C!@6OW0NX zJaGX=%;9p;1C*vd z|2*|HO=VQ5@w@QV)|#U~r5sr&az>|GHp4$D+!`26LDV-W>q{#uf3&#lXTD1>7lndS zqY@@3{^U-+q4tFhe;OnPg>0h5rC%9d%~05WoJ?lcW;OnS26Bi`@=AsZ6@Tl!Z~h(} zBF;{67Vfscv#Re&XUJ}OzVH1QZwY=0{*`!P9ms+chC$ks@cVri^4n0_#cE+ztV}@y zQE78$8Cp`MM~c&S{^MRHGyQ0L@&(Isq1vHIzTZUnp#0m_-rH;=gYeG#g5=}0Q zWZCFAfSoZ~7~(&1kespFtF@|7>eEu^n5WGc85x-~-FkS4Qm2NEc9AI?dl&L{f4V~4 z!-Mxjm7X9aZ0#c)XHrtqY`vxGrCQ=aoEa&N6g_`&KH93FQEn2>-I7FRQ7yFVn-m3^ zG7CsaiHM{mJlNOJBu0(=wl;5xqMM6`3NWcW7(ktD80W4^+>dkpZ%HCmKhuYUeeH~Y zu~4L#HGgQ0wm>+Iw)fQ-i2MwGcZB#XI!Iax2at3_&xH9F)iGSGW0H!Y$3iZ;!Ks7} zpB;AZ3kL&T+6h0KuXZ^}o>ReJMxY_)5zkbHM zv#$pGV9A8ZV%t}mDGT{)q(|En2hR{;cIh}HP`+83uaJ2LlEsPssMir`sw=m4JL!~r zs@Gxs9(}Fc3WYz{H?g){WQ&zr2iF$7G?sJbodxC>{<|@<_NH^L{XJMbK z$dUY`MbR@lt@KxAPj>^et4dfPnf4M4%gy_>Oh+=+klLJ&L=~F1_OE8x|p=jvN6ZB2!Dp7SIx3Ln#&`Gbwno!{+s0Gb2Rq%Pyk^Fv zNOeszrmz)scmDZ{{OS`@ie2Z4_x*=9)l?2qR^cms$2k>!`ZyHG9dp${>y{?7^vlIJ% zbe?RGP6XC5?CBym6fyr z05Ns0v^cuBd;)L8+V!*EGceev%Ovo6VIC1|AX%z7wX;Dr5_d9#Tz-~Q>>w6+G@Rku zbG_!(PXB;^b9eVEdFLi@`ao5dP+D5LF~)|Tj<)Iz((-~Uwax^7B!*~TBblFeI;RWE zQq*5$FTqdzz1k^I?lb?i`0U3=37?-6T-VmV`exiC$}G!6liz+^>48pOWN?;XO8h!D?B*Ip@waAVz_KDaLUUhfLV<)~%@TTdtC!Kvv(-DV584 zZlbTcPeeNr=SxlugP;3EXH9cg6ft(hn@xooCpR<^+}XQJd~HhnE){LbfTopHPVt>BN7_a(v=#&-KwK2KZ7R_~d2J zF@99nyMhfpe_Nn_OSUn=vV#TGrDT#Veez1zk*pc}Ij2QGOwt>k7$faRE*FtI)t+*T z=#N2C^cP!7#l5n9jBjWRKPPk;;L``(Wu>>50t$6+oLQ|YB9JA!#XYa~WOQ8sYd?Qt zzIjmB)JP$FI4P`i))g%mYr|WSMt-d$VE_IT=S~-%0=yuV&aGXeY&Rz@;ZE#GCfh0F z8x}PKyL6nvwspn}`w3Lml3LH&g1eD$=fRKS&TJAD%1c&)6bH8lmiFHKpJ-)oX&B9vqpHOlzV8Vp=dg2O%@p5)C zhpp)uO1~=k{E38RG8jS$eI>jbN#Y|(*}AANspDJjGELk$YslBx|LU;xL|LG*uU8I% zK!}wSojhfzJ+!hgZX$C%kW)oQ( z6k0HhxQ;W($j;Gq z+a_oz0ag2wE=`aps7SlMf_}@&0U!e?np5RrmQZ|+?GwuP-qvOlW zZ^?8MAMHu#K_zYzSuc;WtL}4iR%;W7^nDqRf@SBF0?diQqy;qnA$QHo{tL@m*oviD zp4IO#iz3Rr6g2$Z`?Lv1&@?MPm%GRyloH+uT$zZ(%z(EvQNG=~ zlW%3HczS`n{Pbr-v&0_P6gL{&Zup~3DER*lNrL}M-+o7<7MmxMV~2D7VagM!K!m`f zL^N!%!Nv9(PzN*3UBky)`VXXCjDES*l7qZAZQpOsc`*ULOh_@j)CnMuNZ3NUV&_ms zNN=bE+pSz7)BQ66w@y2m+m*`iT?bKp;GA$?Tcce{Hiu z=_5a_(R0C{_A^=>uin%=zokE> z9axMoF7WAJ5x#ne*EX-u>%J+iYmeu(7L z1CaooUm{87zHrjt+nJwl0ysA~pdD>-x_7njREz-wWBGsULEw)llj`TjY<$c}LAzP1 z11J&{q_SstkKedR~lYFflTg-WuOD`O4?` zwqLF&UmfR1Ha2RwPP#sdL+t?15T$sb_&>nOR32MFTW%L{C?myoGKnC?hHqHL+83}I zWYgYjYE5-Sa-!ZhR+PBnP!?#IRmX0J$aOaQmmF5ymgun{0jJh^{gZ>vtJziam+mQ5 zwn1;ed#_IT*HXnA;zQWFfTf;0#*zPR4Dmp#RGd*r-7JOHT^^;&-rMXqvcnDX>AqEI z7DKMde#Ssok||W)oIM^Mji2`}DsX-cW7Q&rd)ZJNP3{~4TFPh3?rPS@UnyqNNv3h; zNo$4LgZB6hSJFKV*-unQW5 zWCs|M%x#P$Dv2r@iiuu=5dWZSfwqt2XUP5*_)xb^hiJ3QEUK7^S53_PYz8u#ee|6w z#OQd6DGAR_<7oelMNcRk&ZXicWJ)^{E?s~J8P;f4EBzWjT9y~tz+huUG8a3N2$u4j zQuG?G_I$w2wA!D+<+xq>=xHWE=O%B@IQG{4$(FRPg?BUdXOcFNNy zHT&PkphvEHlvID7D6=}|F<|?Z*a-;IP(#9QyjVerM9Ryvx6uKj0#3X$L^-GtpeRo_ zK)$(qBoNoiYC`$m)>UED{ms(x8-5$62Pg7h(#1QMi)gRZWG|a?Xr}ovcvivfrA)|- z=(D-hB-y$H~zLy6(vmrD;~T@LNXk}MjZ2eUj5;e zFsJJY0{(kD#K(WhWLX?=;kgMdR~(aa8e{$hMTFYp%V!YP2(=BMDW$fF`bN+P+a7NQ zr&z;b(=9J~Nn;Rr6`XSF`U3@#iyHpAHa8yRzMl%a#a!a#_LhgfCBIJfVhi|GZF_Vzvtkk{lj)r<6 zm7pB3Jb|4F>;;cT!%8#sN&LL%lB5pp%{5XYD*J~U8clVP4P&+XM017nPq0A2fy)Cs zm~pI`2CW4v8-V+RoP|!id2hY?eTPr%?N<<|hnfd77(FCMM3*D9yeQa<6W#o>p&lRc z7n|Qia9&8@syiGYaabf7$S0x$p(Y_qvTmU!G#s1DS4b~@^en5V4MtcJoS#)7tR}Jzo``x^N<&}-DkuoHh`m7s^y5x zjv@1MEA)9t2S)R>gkvP}o z8UnfqZtd}b*V!@if>F4M1ga50YaSRR)l>d6(e?kDYz*cvBk5yb0kv ztJB!y0i&g}xDdaE8yoTedZknv2Jf?M;I)^q5Cgp;@vCztFLmmDqud8u9A2|6?Lv94 zK&OW`bvtGpaUQLWT3;p1cr~WOb+71)zY{5V8OE)shsZb32>jr`c1QA^8`!QZ7j>sG z4-Lj%WDk^`x}S;3zW&JYshCejB|B(G`Y5+Dx1Qpvrf2oMA1>!#C&E)!pRMl%SKAn2 zEh&!VYc|WmO_I|;l}2)cu)Z|sdD;SI-H_q{Wod`a7gr8~O{U%*^n4wD!KLwcllDr& z=`WUI&}Z#0(ERMN*tNhso69luTPxFc0|O(YvW4#{YTV;VjQZp)X+=pbMFiKTc~=n z-;B5BSPeUlwJj86Z%m$HG&HbbL|-yPqDDE3JO<8xjBn>1x>X9jn$+{hBJiS+={}wo zRI%fFA z_cHD^y?fZX+Nw=Q6=IpSu0Y-pofgWk9&@Q#rF3m%idCr<{~HNSJ-<9Hp;SRENC3n# z^s|dkyFw2oibZvcp+a~khL0sV-0tkUS9!dd=1FsMsBCQQRsAb?v9ra;hVe2JGSlvO zo9XM#IEMh*0^N+O73b}~m0s;^s@d-~DoWJ`;3R06B*EJIrJG4>Uwh0PaRTaQyerP6 zBcT%HDEB8{>?4WE@W0)l$?vb?_&Lg^2D8Q!5hID?m7`Vyb#{>D1W@EUrY{cR2hVRQ z*5p=&QRjt%tA>5V6cpE79$oc#@NaIE>lnu#|MyTio21gSJ@YdREczCb8|2ACiaMMJ zGo+BrZ3TRK!AkE>gG^D1dP+ah!!g#Ph%OdgiFA z@%8Gdi)LEy0Otq2nODz7k$vMQgg$c*({~wM)Gm>;KuWF%!|Gq9^^9AI{)aUScL^rb3+OZYuVHn zb$NxLvA5rldutJ%Hzog;ml$iQr6ehQckULgoorl=wP8H{#OM3t7_A|;JQ*7&$Edj~ zf4j8RuXjc&#X^QN{%temrescP1Bw8Y|In<36N0+Ev`TDs+zB>#k%9^aoN|zMYo+H2 z%-YjdQPO5y{@rRvYz>Wk51_Qb$n>x(dJ8<_8ZFoUD*FKU)1>zYXoz zw)#oEXiZ~C7GyIO8`Y^jchi~}Tf~mg!3wZ(N~P&=;<;C(VpGzN5}?*#y1{A8;HjN) ztkgo-`tj<#w}6+2Wv{vW^c5!vF}_Am2&6f;D`)_fn~46#nR9*>(ndM*@NC!(ai3(# zDOkbka8j};_07MP^|WX5k5lL|= zHRJ=;P>JlA+n@U|7;rRcIu$r{#PZR1Q4nMeF70*@2#Elti%A}U;^NcVnfx$_#QZA{ zrqts;ZRxraD22oE?1*n`(VdT^nwVR|Cs4KPylepC{Z2g4WDH^CVq%`(z)x8f`wUxDW8jBiA3RiGQ) zQ=N|~(LGkJ-(kJk#y9$o+kZvxWl&*ma_P20U9FzL)wf!s?CfCH#~Oc=&3MR%(&fOAI%UMgZgXYbsD zJ=Jcw0>8ga*2y$-q2)-nI@+;>Tp`*^yNf$KBo(60xl4F^Y4-!YwuXc$((n_Vl5&JJ zy1d(*d0id$c6fLSVue^o$UQUE#)nut^rkVthBow=^orCTQCrw(UwAPI)9&uj5gT72 z@Z~sl7mJ5M(kU_=V7neJyIIUM3-IF|Aws;esilf_h6UhPlsr=8M1Qt7pRoX)`!-s$ zD!QkxM2Fp>pKtP#aSBL$b3s7x)_hmHfL!t0W-;*svJqXQ=|Z-B_hk0~Km*w7G@!M( z8_mC=wt%MDp(7%pS?!Ym`HWyBz-fc^Nvw)cB`Vk5lGeUDg9Q#cr(RnZ?u*qF!M+4Ini#8ndZsx! zKjYX=2l8nH%VAfUcNp9Myx+N%33^&RiOrv7w+J+qZh{q^9aVtU9x7$t9j_`G>|By}Z2ehuHX`cR?FKh9Pt;r{j`Y=EgquW_g2zm*@s2f{PM zbm?MA9||F8m=Y?RD6Hu%(qqa>V z9rPVHk$rr5%Rfx)0u9la2tEJno86fR_Nu?#5ipYG(qXZe-byFB4R9Y*&HD=Oe4vAm zAE!+0;9{wKb=$bI3lPo4MJ3?G?wd?A2Kd01Di_Lypmd*uClL6MLd2AyN!U-7g3qNm zu!|Cwk(5Z1NKSba;&XP286Uf=ICPvJ59d{wfzS^QxsRu3OU5hM5*u=Vo(IAYqM9&u zBo~6?eL$W{P}Y+Ai8_MoD9!z*tL<-#;{5+WbVB%EM%Hg1&RQHyV-O*KBlC#{ttQRN zOhDF@MNkR02#Yb4|JNsmcrq*GB(IEkzFbrwm-knmMEnVn=bZ(bX9SkF^7mdwR_W7O z{$ly333y$he0Oqz>Dguwrnwcp7=LT*+w9k;Ur!vy+0rjM_5as@_b=y{A&ROf literal 58510 zcmeFYWmjC`vNhVcySqEV-5r9v6Wm>b1b2rJf;$QB?ry=|9TGHHaDNxcK4+g_a6jEK zR@2a{mpwIWR@JN`Qdv3t=`0?SPR zkz$xfNPw*PLFJR0QIa5S77(U|Tt6>p=^cpWy_SUxsJaQ%J%Nf)3xY)iv8Y6Z(t#ko zK}J6)C_F(SX&_9gKUxA843((+^uS7`)e5vw@=6Bk!M<~b(b8ffrk!|?!+^Gpc7bB8jJ%^*-3@@}hl>`K0XaPkXWh{@Vsy!2BO!s`>! zEP4NXlNN1y%v}|9=QxSyN9+t5DrrG2P}p$*-8YMNt8B494t;+=p9*)3?zCqCFyVk zrV6=S0;deCYLq&uh78dkK^Jh|aDA!P1pXf&wxFl5c4^kHfwd}vbBGP%EydjUAyWAW zQ)X_g>G9aP8B;Fx_<}K9dHYjkRwyg+LgGU#-3PcZ?EQ8uOoM%5H9U-PiKe49B#>ApB+Va|pOESfzgp?d;D{$O!5FskPG~|iJ za`n`$X!rfNCTy(X+A@q33+V9}%&6WG;{Du|=#k=VG%cUO-`9LspFy9InsHF2IAkoz z;E=(mNE}`^}*9lKs(x&oU8l{(h&nL#sMsBa8P7^%uu4 zX!BGyQH^ius_Vsh>S&ztx?&Z1jjB~D;l&snAJciqgR$Ss6;$LW&Ei|(SlwDz9k{ik zttSyHrc7zgj2=oKq#Qt8c_1Q%VFeFGSkmHU;KJZq;(6d!rOFrL%|_!5sk3mi9;fc7 zp`r6`x5jX{eUKEv6tB*ck<1pUEbL<- zXFqk#__B{XeOu}?QCqZNX-OWhIJ+#nR-NkQR|{d7-BjnhOgBZiecGawOTVZM%rm+j zI)XwD`4(1lecRIHlw|EPnKG3!>EjNr%9En3!VbwcoyS0A(IHtHeHv-Y_z9@2eYIt^ z^&q@3l+X8~THVKa|hoaNe?9LAX+47D>8(tmz4}`wV&+5-fJGBy@B zHk-e%{i$21bK2PM5UR_oQ=qM(YfvXukySyp&{ok_gjUp|n5bBmy!ly646WTewCU=99~ z@Yz|cluRM9(elW0&%%AQ+&r}QWxyf2iJ3SFX4tmwb2*gGJNQPi!UJ_(+C_SpT1#^+ zi>~p=5#HpoY=-fZvAU7f&)k`3Ij<+^z3AIt8VkbYwB8YE?{$>h@YV`Ad#%FnVnH#4 zX+oC^G)Fbk+s`YNooJ<0`gKr$Qm_sD&@&R$(*S0BjGzJkE7bRRZSllFNt;<`v%&Zw zEQ>%0D>AAQa}_5A%YTV>&GQ#QxZ_Ay+S=FplCu65vq_5?i^IK*ciDQ#$)zcKDaZ~; z%PaLro0|0}*Ef=@%qiovt8KxJ;w|601e)8;i-sr0`GwWLt6!-qc)d15_n75cWe|-N~cPm^OS$cSv{Ah1bp=j@XG6XRL@eD(O z+_=~>H%~MpsID5nz;G;$JVes@l6B_s4v7m%BQ|qzhr&t1>*wJu+~zGY65on@jCc7q z%q)pJktGqcjad4hbg2xr^hZ4ty;h|$q3MOAjZaU~t0X9y90EFCvX|<^)+>iWvx$~} zCS$UavV8rR?$?Y~^BcYQO(!;OP#n)%QQfv@BwwTV`P=y?^#3%w{i$93g`w4~m0rbX zXn*8(B=C|rt2ES>*_K|}qHo)B`l+MA+v4_+Ae(z){i?(30{eAgKATr?z2owe2|7bY9Az zl*BH3pMvM3?qj^F)xq9D;?7}DcGeG9nvW+v9%~*%XWuqalz#e<`qREz-Pc^JO%**R z;w2`&LPDfoKAEz=TLtn>Qd1dK1rX>H6$lg%3IeIkcFp_=o~Qlx!gr2TL-qcVcMTrFhO zii{1IM-U{-Z9E<#jQD3r5f>IvwxduKm;-3Teq*0|^3oemLqil5^1mAoL~?onDQwXH zY`wgnwZ;F>7q&@d%E|t_JID!@a^e5%7Uh9OxBWl6NeLk%Iseb;QIUiC@&EVaz%MYO zCP@FiI%-HTX-(MwTpQTkEBgOm{=duj|M}vd&q4mZIwLn?Up>eQ8WkKBG-dYS&K!Un zQ2bJ*rZLacfdAKg8f_k`8cDi=Z?=ml*fe92hZSK6zy`g5`}=ZpOcw_0e)+qrbX7Xx zAE5fbI56QB;)(DF*vp;oe*&v7DP4L0PVNm%67#={{sS|UVJ>V$4CwRb>*LtiSlCdp zw+%WN_&In>o&dXZ!2|pRw$`>b*XKhfP(^9!U~v-M8^nt51hM`DKE_YtJuF}#G&ApM zqt!-xD_dJ}IXSrve+K;6{EiNyHusZSKU-Ll=+aU&8|T09r2ph7`5$CPugL#IwZSJK zn4X?aND@_}tm4Bt&Y z_j{=Z;^gE67BbnQ`JxVZQpbE1(rY6y|7esKshNkQTTY<|=t$zS&=wdlj^ znZfPE?!zc84J*N+chQ$LMz_iK`SZTMq2ZlxKYA=VCcF+*27!h~tSU1qWR3i>9;o{l zdS|G2$?rtcQk=hDoNu$F*}GU6(8>mp=AIgoUXMTs7`ne!3cQv#(B zFj=zsA>FU&L}hW*c_bwT5mZPOEYXIV?W@OUD~As+&z4$RDfiZ2KjrNVs>60qj9n7( zyXQ7EeDwN8ZBX5EF-Q=*jhG7|f^ZEhiaIJndzo8PgVtl@q+`fP9G{`ocU-tf6LA#w z=J^pvxy$(HOf=J0bA-w$35E)iGud8Kb8{haame1#F0Slv&**cRu{q{r&ELmD)(WA3 z+VAp#e|2Rg`JF3^I1eFLG^dJ(Y#qShWXRwg4Fgr42WUIy2kYW zwQ|L*Fbb=U<{HV^d!`<`I!HBlwji5 znv+!VJp;_T6NYNai(Nx4)gi)YE0Oe0B&=8xSTJxg*K$^OeePlXR8m;b-?*Ue!)Uu7 z#*Q6rY-RPsv$E9YYioeE;?9NVf|?Cl5b4Ot%KN=uNYS7F&P%`Sl$^WQ8J(V^?O;-a z3&jg5%DCE)I1&yQM2q$Jd{beGT$vJ`{d)(;UyO&}B9S(x8IwF}h}z${kzZRgub-;g z!T|V%Y1$CrWDDiVUqnpfNF%5Qo3+3EeTI;~ID{Vn+v~;T(V`B!FOa0I$?_D4J3V#4 zcLgpc=ll}H$19+f6C;OL=x`bzp#!!KP~HOr z>%=aw|T5;BF|4x2Nq?28hI+9p)CsoXuZ>Y#W^vU zO%*OE73D8a(fjo21f_eo@@qpCm#)AWdFR1?w_wO2W3_Cu4el+;dv7tVc!zuvOHCE+&y=G%ds1hZA zmn)y`&9YpX^N}Br=fM?tz;fZZurD_yF_6PA4!*_fhNge^W5W_f?N>F@XCxpXKm*A3 zdZ5@Rl{6yspJ{q&Lt9uPIp?F9QbUZ;?Xo?0;5fLs^@b#SB*@W1#K;6L`Y`}X1}L!K z1$9J#^vxSGLI1o#x-N@A5peueNCJwjgVWQ&SniF8g#}I9w6!4M==#RSoel3Z$v2kN z>mbbo*6J6T)&4)^YQtu~WmNe)EbeK58J9v!OI5;*#irt>H`4AODh=AIZ5Nq1xVZ8Q z3ZTZv$ARFcW6F*mNs1j!21KX2y4g>R9$`hGkn0%+fIekg-K9$I;ex8g-lCHPlq1jK zDXB3{8bt?Q*kK6u3hu^xfO-}vnl1|v(q%C6u&H59@T_YuSKhyUA3@Yy8F1$$7{K~e zk{a_7rKMz`+g`VJkHFv1PF28P=r({$DPEO}fd0CbYwp+^X^*Tpaaj^4wy>z!T!AR_ zNg@emUO6FPyWMaIbno@`NrcfrX!IWBu)R_-wlr_Z+Q$5gtX!o1{OY@Ci4p`@hSu|$ zl2?FNi;rw+l29-p6HxwP`ygx7q+*= z*Q!s4V|{%c%ftBXpo5hb{m(7uL4ke*0T8rV%Hq$p%`djot5sjc6b2&fzv=*~wMyU{ z#RGRp3cGK>U*h9AK`IP-mtm0sv4MOo=lqNVU7h&gc=!**;M|!%wf^SgUZQ)w1t?eI zw6SpZ12B(V*M{u%wMEoNT?L^dxxmTjVg8*oqE`YOu%&*l5bQTM>Uu1a->Xyp6;lET z8H87j^cEjfEZ%TruFsgk9{-oPgGm4MHoGTpQA;}}o#_Ql)mo<$-YkTvYs5~^X;;63 z10OW;5+|1x|mG*+Zb^7Tmp*~hy+N2mEU{o36nUs+9s zIO?Chm%{2Gp(DlL-~}l<)lipfYdIeYj@So@8+HgfXUx>NF{2inHQ5kC{B~9Jah!jg zzNDk%Z>aC(`rn6Fkp<`&9g5IOOb@H)1^vblG{IX@X7dpL*9i2%s_6*Q#ekcTMzA3W ziKh(}u;5fus{|OQa`8-tG|~q>Q)M!+@)#DEGz-%GHTTz7$@>48yXFmsZ|2s4cipz3 zC2?E* z+}2lmH5Df<;QRxK=>GsDqMg)ry8z`61T6~2N-5k;^g)+9h`;zX6YSqJ2>)AD)O(nM zyfIV$u~4wwwzjtXU|Aq1QWm1F*TDFVGvz=2knJFi=N}Wbn)u}{6<2w|(faG{z%6;1 z=<~~MX{kAP;EPAOPX6eH$A7uVK0 zbYQZCjiA~9h}6j&!#AO$@nsJPIF!p1!0y9^@`kM8AW-B#Mb=fUJILRoc{30y8xE6S z7m0{h#+0T;wXifv7?{8ZV-g0{NM9gxR~dsuIRrCd56FGl6tht$X?hX_9fTI1O0mBJ zNLIDQ=nwijqo2A*3&2(-?os>AMI=@^#raQ4YQV##>fs0dP<5AQxsJO21oY!SQcHhV zhtSfPZ=}*r)yp*>VO;#_F!yTD-f@b`kyMW%V#&Kmc7%LyPxa}(xwQ11hR`ctE?jhh z+3;iDTdI`UUyS676F}!J5$D1m zPy~dG8Jsj1OoyFfd8{`5y&MeHk2$orNHoc zwC?~C!R?-aKkbUbT9qlU3ExBSbpPHBw+{af!fXPedXu7OcgOiTnc#d%`m`^-L2GLv zBjY^L(r=>u;)vMV<$w?}oAV`706}!>b+m4a7n^Z3z&1FRXZF2C6PVeOfY<@&^(;D2cu&|i@Xt>6! zadB979q-Wu)hq_nDy>a7DRqsL@1trT7DEhc2Dy8cLVu+MZzBh5KsYo3-8BSH%Q5k25JXwZ>33HsD*@*j={+O1sG! zj@_HW&%TsQSs}Des9x9tlR^8qu&pwEHHf%jV5Sc-T9sG0@n94(%>$BJ9h7->0f}PFD zi&yf#bm4E)zi{md9QkL zH9!Enj}2#`2ZDo5kgspoeB21m%81Q}1npg?8jE8MPMux}sbbH|bx}M0{UCA#-hmh& z-UV(mzm*QiHb7b2y9H|v%ecN2qBKQmpO zKl$)7DYT{f!_HhG4A{%PM4B>fh!IP~-rioOsL_T>Coyj7>n~XR=d=Kku0#UOdGq74 z+AhE8J>LB#Ci6#$D4#BS1UL?Wl7TFC&}VFH*ks>SN;y`7JVNSQgao;c&L_TAFuS4_ zt4CbqBU{&FAMkP2csNs3Y4V5xlR-Yg@CZ|4f|FJ!48sso^9Xq}CV~Cb1BqXjrt06d zzioz`iTolQHqdyMHc_xpES z&bwCY4ArP_fZ!j87~&BZzePdviijy0xbuZ<6x~x4G+GP76|eR(d$hdRQR43|2uI8d zGtYgM-$pv{h;Dw}<9vJZ`0SvQ!%P_auvsHFXODe9=5Iz8O2$LzGgb4t{&g|iSA048 z84~+TZc^ff#kOsBqg`>*_%E~ZhtQ^Uu3WOZROdw`hJAb6CPvIBe88`fvztTe;XSdy zzWg9dligi9Od~bp3v_(v@%cdA)!4TWXOJ*k6i~XXtgS_9F~#KMM!$3}ijW2<(|%3b zA&wtHiel0NvBBhfdKbnc1Stj{Eq51YN)U2T`_{ zH6MM@{XrfdIEn%IBbdBG(YUB&2$0!@JDldHxs%{$rE7r>4m)CohcSH|z`k^gVmH{~ z2j-yJVB-_iex8pHq|fvlq;@a}w%m{}l3$UVs zf&tVppppfY!a$&g-iOCr%}uJ3R4&jy6ItCpF@hT#F}^fM+=)3q_@1kTW8K@rb?Vo( znTjI!E_rX>X?)zu(NWsln;#G~0WsjMFielWiQqU%-K~6#BjT6kq8gx@Mx!)D^dE2vuhj5?Y*c-h(%3eV+mTq@V$CS zDzAren>hkCKEeYL0RfZJNjt{t8!y)e(SpLNfu7a`-Ha9Z6z$*)u}}Rkg!B9M^G||K zZ4G1iWNP3^;yj+^*B%%ByV5;zITGo{-K?R}O0KMYtg&T_57jl(@g5sp5fIYMsmxnS z?1&hu8V&Qtres{Rg{EQLN@0M(4hV4jLYP2z0{PT)GK7?Y!L-#|I^(xAIjXf`_*xUr z+LEF`{>KSoNs`}9Mq)rCQP{HDM5QiD1Q>5>%LDJy#LWbM_B|FxB&E7WD5cYRt7L=# z!uvVMhS5H_%r+K>r}apcrvxI~=)|$m>v1sxMvq_V>FHAo3tiV&jjYj8w|vdq%P-xi_iY z4#)UwcT%I*&BtLHyrJp9%JI0^pmQEO^f$r?!{tmObZx?PO(MkW1u_DH`NogZVe=oK zb}Kwc(_|?sN|iN4G*SvxnBx_8vP^?LU*}N8xIY9=u6J$N5Vi#yrz-k_xn^o(kjQOy zZ1A+k8}`54GH=ZX@1yZ^gF}CUPX5tD<*aOm%#;vJd8EUcsi~M8ACOWN=p2eZA+S{2 z>Aq2(o>xQnU6mdeT3hXGaPXtwn%g-1nv1<)*iaEFzi?C?&LAtFQ0$=@{Fm{evmdvy?FBw%TYzCWg+F1C;4VVDt{g|oD zzPRN1bfGg0v5KHwG_3D=vlO9TqpY^j)9L5Rra2%MOR*1h63knd$A^@$L2GRBc?l&! zP;qL{b#*qlVQ?&nvv6vWPGQ#-F~_{}{~&bwU1@yJxOGC@6DF$B6_>&{$py&mSVDr{ zV}Q&O0K(ft8*r&%(<3ZTmu#OZPsgwfLpzU3*Qv8+lp_z%;9l4jUtF{`K^{FwQ*3v* zVeF$Lq!X8qVSaQTYD`C%+}HZ%c!QkE?G1z`&xgYzAPm{Hc(1tJLxKBV5&59^=-V#C z=gsTqUV`VIYv-TVofzOLbwWtbb>B}2fCo+P4@4!|IvtvkXeWb}F>N#zIvOD|Jh5p* zf2KqxSa|5JK`bnsJs%-=J-?#@((193O}G=>kuUkBHu}Q!ODG`VJJZ}O@R5rnnSU#q zb_;68bKOIXq4CV`Nz59VIv8&bCe);IGnSbq!d74dy~GHS9J(>FeBueD5e*nQ=Q!hra#h+g}OpTxfaZyt+ilmJ1tFnSnq4lrf0k* z4FKsm`dwy7sfjbxAYxnF=qxNUw?brj7n>1l2MwC{xX3Eb)e^gPyIA_X(Vn9tGeMvQ z;4`sOXn@4pkD;W!Z<33s7|3W&x(FVvQVv8uZ|^qKIzj|8Ttxh1H$dl3krEtdk*_N) zwZ9x+wqZJPYlb`dJia5oe4q6*>Womr5pGaJGMe+NPF;KAFe+CF_KP**AvGVK`sM-B;2sT$^AYWp&QW4lhP|}Lj#yRRbehI)143T z9OP4RS%v6x6yID`v7~_kh!c#YotWE9C#X0FR6s-|*cDLia?J-2)m**iTtljAG1)U_ z!I<+_`Ncy_He3yO*JDDN3dxTNdSl$eKC-@EZRMHLDE`KB$Wb<#f?0Md&WDRNAb;4h zkj+^lk8|Zt2}>I?4V`RhueAZv7N1NiWIS6aHIe$*YK?YvJcPlp+)zFVu__ zjKh>qy)_ht)nHXT3CqcO{^)z|Dfro3huMEbsTeF^-gbdB?Vb_uxN zh<{N6omb$d#?4PE-0UQpwyOWFdW2(luxHV4!fH!57)K9_ZjWU1L%;bvR|(}u z4wvDv$LX3*!X-=Hg5gU$bum?k}zyJ8H*a?t0tl56KqJPEH`T3bG#tR3Vhf#-9 zXRZCCVCT$3+h;1cFSG%>UM%Iwf=1Y2T#hRdRsO#`(2h^UNw-f%S7APn$uQsUZv*RqgOY7Zb`a=74M)DHw|sHlpuIh5b4x>HByRrztcXKF^BqRtaRDCnZzM&QPBmx{tJ7Y=~ZImp5^g; z1CO%>rQFppYyn}sgbBD|MZL(%CoRLxHC(Wr-FV>-(}6%$E<%Yuj0Ic&MO*DcG?ro@ zT_4Pg%DiD*vha#(NZ*bkFAVO;w$7Qz&c;TJ1OH=rUcJI`y_cP`b&*gK;njy$5Tu62 z31b4PN?cE8;>~3T$blp2`^vgMnmP~V!cu*LUP@6*by_`qu@^jHa$+tbmw`cimG7>t zGA52b6J8K)fax-?oQ;nV^J(iYHX+{?G4Lr<3gUZUumGGVPH*ZPHI5#a0fNRkmRoI7 zlG^h(T)dp1e3&JHp`##Z_Nw?%9FjFqyc~;v$sCOYUi3(J0b@KFhkfzD#jN-zh7zLy z$;ZdLburHICS=pG@dOQa4zAEmiOC+SKcA__+jYjiW|;SP3HSt7+9Ga^ZQ)WEeOY?V zcD57XAwJEL@iiSu@R<(m;v=>ri!FP#)WR#TpCjeJN7A;}%zlnS)}RYWka)aFO6W9_3zj0>i1 zCR>RIm=#x!5F$Dtq0zC!bK!7(E4?RK_bv6eYgDf5BsP5bgY3EH;JF45tV=#^(0+^8 zK=OD7m&r-G29LiVC?@+?PjyU!f|}L)iS6ufGVw@O8~l z>7g5ulrDEa*L4=ZOQp)1%LX`$*23F;`O$NN&XS$F5YkG;bF}tFXI8%iLt*nf0g?T9 z>3jJCr>!NZtNxvHZ;3t(8FU_lrwp{0KX@B{O%Z_hc+=$%1}7*Uwad!M8QE29ttR9; z(736OYmrwt^Mn!w3(0rY-CKgXrtd%D> zGeG;0_7htuTRZ|M4p%?^?kjeoBWW9=&1{&)-4jk%ghrX>Aq_9xXLC5QY;$>!buWew ztB+i&!;~}DAL$g=m>i&iTJ~-UQy>cTehj&?1?owXt=!&ew{@^Q_VQOyUSFWvuQy2NIJT0#PVVdzW}yg9BxsV& zd)PwF*}Sm89B&ZuC%w>&Gq_9}p(BA9K2JTD3K0lY`E}DJBr7HOSYFTMuG^ve+b%GE zHQHl7Gvi~ScKZ{hNI%9!Tg;iO)oe{cPNE5&iNVs?KK2=PBW_=BxrjhB{GszCQaVQ@L7b^AME)etX z>~Fok&ju_G*qSqt!@QpxGg=0h)dX!NdT{H8M&F>vA@!ATrYwFYN07nylZ^Hz|?T*wb$2u_3m*5nu~0c_>Q;U(OKnwN#JwL@$2!n}dA8U8Zo#aFFM- zld#lK(bjE*uF*SdX`NAW%e#3esITQ_&8H&)_lJ$>#O6e!0@ayYP#|rM{oc^KpI-<+ zZhU@+`M}^-iAQkxE~8S^pbra>e2G&)I#YvjT%i zMWpP8HDDE%r=R)XIyybwy?@%VoHQ}pg7nM|?XE$+OY;{P5}6W=Zzdm_xvaV!A47xk zO*E~W6-`0Y8xI{Pm;hK_nZ<~l5cwO>($Dmo3moD3Sg}k9o2bohyAIbYs_+u!mpBjs{V#=QT-@0_hYi>GI9uNEqFA zBr<)-$^=7)2;RT2Ba06qSXKFDggi5utyV;xi>$P`5ayq`+$ig?60< z*2|V{S2z~K(bfg+d2>?Q9~zykicgOI(n8t(%WT+vZMIz z&yozaHk8#koY-)1ly{@>b%r12sp*g-0Hyi7C37x(jY+L?Y%{YX}?Ru1R} zdK!;}}b#J*Q5KvXG8c3;VK=7J>6l;%>P+aOTx^rLZ)vR49VQ*&%1 zus^*ZcqBL3i=S&esk932iA3&7H?|jLs$!_|B&*E)aPff zJFg<9ySf~6i}{s}(41hxe=C*8G#~j;*X>I$=hG+9mtoAALpRY-<-R@&Tq1(WdmT{4 z*paZnh5t@x{4+B6M+Qz|w8(IiuVt`r)rG zc;CxYeVoM4wTj>FSBJl_X4b46;lrcT7QHN=_nCfJ;O;W^al9;yy)!vJ>(juQlN4TH zDcoZZGDH(Y0=2tWZ+bG)9W?T?G;zv|K}G>3x~3tl7!5FQ_(-l#_f|D_oD1lmb~6*l zn_5}Ht~NQ+)VSp-T00B^5+%I_(*&!qo)Ozff!)v(pklKw9MUtN*fk!0WTUP>LH2Pp zCpcK8wZb!30)8L#E{FbGi)bSvjFtjQ42^{IxZO_SqlD}?XRih>C}| zS6jcl$P*7E(KzOYX`!ibe~g3p;b&3J_K}ff6z7iw4O5G^wL72@Mvy9qLbBw#ww-ZR!9brWD$6NT@YRgMs0&I}a3 zB7YoDqF{BZ9j1l`s{ls~h4e-^=^q~7JpsiW-jr+hv_BZ2=F6Lmi?vJUygXwleJ0+j zNf$tSH>GEd)XIKD3z_>&J(W755io$j8Ib(_O>1e7G7@qw8)yg9bpSf*X^*B>b|XAC zE2y^Gq!rjSGn9p@2fT^B9Vouw#7zWWXw@^d*$p*gKk9e9ZangMA3zoB@{Q+vdcErG z*6a+pn)WrOv@Ky%T5x3&5fL@QiT{#i!R-ec5jEbor(-EA%AJEAb_Y->Kt3sS&< zWr9pd|BL{j@BzcTeHcmt3HPc?6UUEivta8C!)Lc@jiz~tnv?B7)?{u9E3!&1i$b@= z>`U;LRX58&RO#PpQ!en>v;DGV%1o6t)~@Jl}%L%{H9Eyo-Oy#qxh#?~)#+ zAr7L2-MZc!PCY*gwC^F=3N@5;YToy4@AMGPhlw{bF{!OaO56+|&*0u^)Js3F2|t@W zt_x{g4^5=;cHaBJV3c;4FW`=iDLa!6Tk98#vOdIQ(YdA?G_H>$Pu-_kW8p%Nr19?Q zsp73WUxRF-IIPW{V+!1fui12@!u9MQ&A#)vTPW?Gj%6fUIG z5?1bWcp1M_l*MfHNJGrgpJpunaO?W>*c@l{RI;=2fP!T|Y>sJ@w(SS3(b~+u8P0pZbtoqc#j?VON_C-XzX~QpGbtHa zi4Cah_NJFr4y|(=B1k&-96R@&-S(W__8x!S?ekc-cxf1fPqi2XT6`;=LXMgWVKBt7 z+unlSJq6);wemgqTedKh1Uj4olkzmwb5*LJwbuqa>gq563jv#ffkEgQbb2}}d?>IH z;#31e^4m2-#0+{hgAT6So&pX+H|_ddhU0r8b^4J~-*jB3XHe=`%u8`?l`W9@YKVXa z-nhv)a+vYTNIwc4&IwDBFz#@^rSOt|%$H;T=PJ*tGs@5CDq30y$>z#!mBd<{<&cw?K(QRP1Ela_oBN@Q z$AOE-VL&G%D*vMj(%0j{`Am)7%TMRmRZk8dx|)C_j+V2G)z=vIupT~4e8ZK2H3^t; zFks29Yk%NAa(lm0#TeWVOkV>X0bp)eTdglTzpae`n4$DMDgi2@$LX1wASa&R5pB;v z)6D{ye>YCulPnC?y27o4X{b;dfYaY>hft-Fme3_srch!V~q}TSW~6=nMxBeG^HhJO?+dwv_9O;-f4qB#i)ss?ZS^UUt$U5+!du)xhTNuDPhBV6do=N&3Sfw^FfGJMh~a7fVr%1Eacvo|40&;f9q z=5=_?6hsxwo=bt#yD5dG$3gx*R(!hId#nk8yOpz72gMr4iF@Y(zD)cBuqtq6&bmXN z!FgbYg`Bm*T-aHFos+ZV1q&(?0V8?<6*+v|qiH|(S^l!OeGgLJ)`nf)+z7 z%qEZdX?;UMud+Lgp$c-yHC2|axT36YWeb|3+r16z&R16|c8_@{Z1h(U?O5ss_6|S0 zfI!e@=MDC0=OuQtKAUIuUj0qp0!k>jH!BTCw{sb+8o$ZAgJ@}1nA!9tm}o!>?5v#@GYI{ z{g+hPyvIAqa=pVqy*;vM!Y_1Kug_@|dn9cm6ZL#y#xcXUXPw1KpE^A(@N@isqg30N z3mzTWY+ulRmYC-86P%5Iy=;387#wUUu7Ky&9G85SPV#_d=lRDv z?aKr7f{GQvCS5RrEIe+wjcxlQZ>wa z%`A6yzneDxI}iefu+4g0{QKbob3GbpT5ON7j|I%2-e&qWLt|kdaCM+bwXm@8#rFju z16Oy2rqa@JbB-aCG_LhJ%gNf!aR;EDK+u!>64xLmC<4d!>AUvQQfOtV)G9BK)1RNiUo24vcb$PKG0t82 zDQ~@AZ|oskXV)^xUijs17ibwPo^fB!;=_0?FuM2X!egxb`s5huqS2Bx&4P#_id@q! zlKhL$HqO^RpK&+U#Q6OO9haD%m78&4$@7N{r}4d2r(JuGo%G|T?)^)0)bqSXu7I~{ z1?W|gZcU=wERl!ci(CS-4av})8+wU4pDo3*#{;9#bxV~c9A^}{@o0Ejp3>7rzp1xJ zk>=B=zv9cX3oIvS>SBL*Y4Szyh@IqK_D^Y{cNz*}ptTuhh2bLxB1*j3^Ak`jku@KG zR@HcRWpp4qi!>OF3P2zn#icPHMh_4Bt;*RJOuy?s2eg&}Vi*`Ca0sQ1LWO+CA7!qs zE!Pc5>n~S&{t*Z`Og$%>G0y53=eF6>QQ+?}Ek?F+jYjuk3!XCNLTkhsILe?wL0Sf} z>h!U0V5Ub6!OvmJ6f(`)VtLdNY%Dm);eE|VrDZPmKfS?x)HX>u%Su-YOKmLkGN?2( zb|20fN=xw}&uJ}cUEX|uh}gUQyGg9%memZzXO|^d=&_#v$JJj)RrS4JpfG)C5b2bZ zPU&utmhJ{=kVYB-X#wdJMY=l=B^@H&aTG+l8}8!s{k`uP_x`109Ea!Zz1Du#eCC|b z+|MYMasU&Y{f~?QSe~#WCzqGA9VoHk=L=mt5P=)6xRg{!oA-@^L`kW=mi7#`wzeVM zG@Ore9;{!)|7=oTNeG9>a{iD%RRzk{IH}4H`FC&oknbA`$ zTRh&sj(vJ;$;G%}W3O_$niv(bh5tG`JLg(UmAV`q%XTDvYkzA&6{+Lw4ur2F&pZxs1OSqN`u#$ex8!_#TYp`N&*lVF!p_5 z7_=Gd1%|>Tlv%>#dZR6iSzeB0E&0QXGmmfX@HcVX{zJ{3B zx@nfkS%6Or1mj6RxzW|=m)@sM`cIX{k6av9&Q(#-5QzANA~-Y|*-4V@$=l2H%DUkXatK?rCMz2S&v4(rRbCaq4beZK5_JJ$Tr>fn03Y3^L6xy~VI~Om>E)y$+b5LrMK|(pduO8*eSjxa zmfUm%e!8QlL9xR|G35M(17)-T9z1TBzj=QDm;iuGH7I9FgRAk4zl4gk{n>jL^0uWR zQFF_jl~CH3B)zfLizr_yXu2o^7%rtUJFYF`)2* zQh`$R#rk84ztJD2Sph|SMvv~{Gxxrb!wS-{d#R;jGSEH9BVU*zX`=uP`N|C7^i=(| zcEG*3?YtpRex8i$Ly0gEwk_BzlByp{n*g9XcZSs3j{DVJ+9R)=$Cv&;cy3c+d#2uC zkN*9JA{->Oa`3JW7QAJ8y8jZIe6G)(!E2zS@{lz4a~^;7FG*K)Fs9<~7ii=%CL`tM z3mMW`_qpYk6p=A!wZBS~K8|O;%i9m_%QGgF2HbXOLD@ zR5Y#yG@NO;XDEXaJH3|c8G%WD=Hzp;A^Y7>hSUh9>(#TaMO;U({YCAhH(lUsr$sg-;)1O5~vi)U%}<<*JeVx7Nq^X*Yfw9XT>Mv-7LOKp6eV|r$^Cn zYu5Mgg7q}xfh#yJjFT{%?K?LkPu^pW+U7a`Y6jDxr1^k5M_+hM8kdMrbWYGoGxaoF z*ZCzK$W@6c5qCET16KF#4`*{c6OY#!d1MQ`_xFf?3$M+k#=2o?Mdcfh1L@#KnL%p&ck=6otTb>@!V*29zuD%FW6npY(U8Bp2v1u27tzV)<394r7$f4ER zZ9Q`|$bWB(iG{TgcscV+KClBPhgAm2jGL&Dwk~2coMXREK8{>GC`sP&0koo%l!3IuN6r=w?4dblf+t4<6S1VTA3+7x3TS zFn!Zzt2Qa6mEbUH{{RJ(7OrRWXfTJlh=uZL_?XAJuB$_KPG?Qi>XqG;esCdZd`3H)5=L$bu zJ-m%q@~-{d*<*E=OnKNR-dlta@!+Q5-L4yZ%?Nr-LrcGzfNiocDMmzDb*R#rjXT zvj4{h5`)SDAB^^@37>8pH%g}5+lWU>W!WxF9*W_(9Do(MA7{x{h%ttp=RhqyKn#T0 z7WL=vUe8L$`x4I~ISBN zZ+f9Md`&EQU87DWT3BlD9wig|X=SRM6d482A=RAQY1Mx}RDAE9VzbW4K zb#Oz+@PWnE#ud|+Bi7P`Cdz}%tdh%v?s$lQ#?Khtcm2*}40-2^Up)NI*&_O?EYCuO=`A?-+LK$)& z>B}etcjW1eEVg}YLK?>TytcU5+9KwEP}EY~r+e461q#c_Bt+Wheb9GD`ZyzNmhh9D z$NPkIWo1M-q(nt5EDNQUSGkl&Bn|tv6Ic@X4Xmi~*4@D&Yui8OC~xr|pwNc|RM9)T z>2f_}ptWLTO2ATmn~fKPjPW3fX4F<_ye-JMF+;9HgtOg@>ZxsV{48T;ot^<91&6_))u3OyzE|Gix zu9bev%O1>UJM*E}yiD-G()tBLKRhwIN=aPA$f%KzZpj?ZiSSUCdVH0qq;N&qRETBF zO+p{(Ky7}@5o==C;gA2iHNQ!zv7oDnN|MR-s`6Xx!O+*LeO22ZH?DH^S*$GerVU}`WZLR?!?2&W#C(p zZ6A@H57#h!_YGHp*vd-w)3n85Muk1rA2zGMbR{Dv__>M3-K7nf*NTB4;J~S;>du*L zJhdC#CA9w2cRopf_dXEH8{_87Z<6?NCUTFdD0SvfmrvePbU_BHp4?UsNl=*%q+Py^ z|5b_wdp1{6LYj}G^*)6oO*HcdAcB%q_aAssyPNaCV9(QYbK4vXKjoiM1zyD-FKdSJ z%d?k>GxmOeRp|vd%5XZn?gob0(T9!wJRelkhx@8t=sQF!E|^-c)NesBAQeBduL0w< z;JFuQ{VSfT{9Sp|^dfjMd%qCh)itZ$nvaMwfvm#xA*W!g@9}DU4R#Ypw(iyjc#8Ya zZ1CkJNX)(7&_`*^-J+sn1i(5)K?`1JQ03R8d(uM7V);(=vwVd+xL><1A6WcXg_h+C zag~LIg)54xL^W*Oe_k*=tn1MxC75miR~*6cG{U3h@h-r2OAF>&Bsn`=QKIe zn+6`TJfhHkg=I+a2>Oxau?)!7uhO@JQYDbfPl7n1WBlX+Ob|IibA6q|@UuD-%dgyt5w2axlH~#{dOJQpyJSA+8P|9f&Gh zgixyn5Ub{}nl?j$it2+MD|@#q9Kgp?7V&5LCB^3yonJ4W{T$9yKFho&B=-Z7$>*Q(xlY_h9k${AkC-}sh;nyUI_LjThq&5YqG?Z$yPb2whmg&R>q=L zB(G+zWMwLl#P5Rv+m)Zgxir}1hK&PCGXT~qwXgrSWOqcT*_LKxl8D(U7tuH;czUh4!#xbf^c!n`9zLX9JVx=a z`B6kMjcKLBGWxELVxvqmELi1k*91}HEp3tKTCVQ4#lk*&cdVu|7PC2U?o4g>KdmU_x9ef;TRnAcK~d0$i#3`=1+$r9 zzpDTf;E)Dh8rL=gg(Rfh?K9R2lDeJFDcj?TiM!=1!zncXn9#mGiOIeLZYKQdJuck=#0z!Jxb_tx7eyWY@w z1WDFewuh>5H4JAph#<1yy{nxSx~@?T;qI(ST;e1exJSek6upH?Zh)azH+RAS= zijOFubN?rAfPq1D5xo_?`DMzWF}z|0V;J$h=gBd9KKt6H=>bCg9a@NWOOTc} zuT5vA{8x=7S?%W?e-`5jDF5nKtVL10hqPS+)E>nfLsi|c`W4sR9L~q#5&mg>1X{rE zl4zqgS4RBzFkC6SKcg<~lLFlrG_rKpPfY)kIJvn~b%BVc7I;TZK0k-d?uKFq{J>T- zvVs}u`W{RXm1_q-8g@mCv3%A~Ti272kNnYH+V4eHMg35sSwmp@m{`WI9u0n)V?e1< zL_7|4s##-H$mS&6qSk$}!-(XMvpP6>_l@IfCQ(okaH{6QgfxO#N7V62fYfhNMP`x! zp=?4(Q;J}WUKhYFmu-0;uoege-wc&m4}`ogFWkl@j-$BO{`PI7F2C(9m)`2ZXi8jx z&o4ycGNQI*N57AVHGX-ix79wycSn6nj)}70!fH5sxysF3KE8+|Nz}0?_N){W`K^51 zd-SuB_lIWBwb=j%RQCa~5jZ2}F83n``;YHVC)QJR?W+Ut{fz3&5i5v01i?92w_Ixkjf>|^h9aCT4ap)Wb zjMJAYw^+>*6D9V_NAr+~`8Sj8>bR|6?`c*$L#+`HnmrLsR=n{$ia5+x{uyK+@<%BC zlyv%u(px6iQ^8*`CywD@)mvVEx#8Wo7A-9r;Pcq0;kf#a1z$D-7%VmFgPfZXnjZ9$ z=srqq6+2gKPKl&;-v9QF9Y_j27~&d0j;BY%3jw%NnsM#SM;d?dtb^Y0Bi|_B6DDLA zN8&QN*%udZA{aqK|Z2AILuCIe8#ZLP}j@gGQ7sH5!gXDe}>w zvQ*xw3oAiBf|cIwxM%6=Af|{@m{cOHg zYt&VEZ)0qofiRPrQZ~0a`iy|NNXnYGcZD0*F?PZzw)+St@Jtg)WC|CV2q_nFpw?`D zkGs0f1xh%FS=rM~8?a@HCA86NSZ6QgZ35BwZ7UY@=jyy7Yt}r#5@7<`jzbmDj0}!? zXW5Ygw;?<_#Se`33j*a#U;9N;pG33wYx6E5YO%ua>Oi6#mwf)bs@+OT5OBwu+tzD* z&jChzr&n4mIJ->hUY|ew(OY|;^VjZ;6=xF`V~d`2fkenIGxF_p(6td^Sxe23nGYJo zv2&~EshId|up)&Z+FIejvFrw1P4^leFQ_?oYgwS$+mk2JbyX-F`C45CpxfUqLIYeV z_#Y@=uFI#8C=*oH8g+<$e03;kFt%3jyMAJ&y3xlzTEvkprkUP-l$d+zcCg1a$c>@# zyBni5$~peRQ2(&ENZhULT1hg}!-MRsoHW6(pQ6baQ6KNcM-hQ|lRhbvB4fXdF zlADAHpyd6a*V-0`sCcC8pozwJ)8O@!pUr~Zu_uJgOquZHLcA+p+gIUo&$(T^7k%UKtvq;X# z{VuN526G0$VE_x@HRuSA_4}(ltWDooVEb|Oe{-<2t8oh8&x6y3H{qoZ9}OR&`S}vq zg_ecqW$H7RvimZBVCli6vZ`8+c5g2%=>ky-x*VQzF&>TJcUGuc2@R`EV=##1){h5| z{D5MLx9YRqH~QYpEy8}GPUHMBWfz5;DtTjL-USREM{Hm%mj}HkSBn45b0)V`^DLsK)6dpVaT>s1}`sS6BUhUWc~0OTQrHFLx|>lc>-mtH%s90 zE@OW`UjH01VdPXvblW^>Ta%RP+%WL8G5&%f1{*E^WH;}qapNMXMOCdjTuSi9ChrRG zc3;oXXeM|@*@T%lfSh0$F;bW!;U?0Qe*}S}Y_bX@W&9D@U^64)AEc>~N8dXPIJFX?=Lq;-TwOQi5AiET=)iQO*adNdZ}y+C_-R%c zLo3UxbiS=G6c%yTltLwuo&A+pEL6D z@j;sKct9vp>%eHrW*2%+BOSLEmv?{C6x1ZtNA=&M*_u5jpj0L&vcy6j_IWlRM{5rKRlB*R5oEdfus19EhSer%iZ=bVvSbpSs|cju!?BIGECM!j z@w=WU4o=CM-Em>iTW9^mtbEREWr#fuiwDf?L#Z1^_)A}h+I=-f4Cy*C)uUj{{C7Gr zd(Ie1yoP3>@u|gCAL#}-U1HxuvOem(Z%fwGybI>7tOV@r6PW+S0+n>xn@_TmvTD95 zO%VvVcvA?`>XyZN8!`aBxv|vK(2dExjNe!RWMNC4EvH)xPRR}v-S^~AAU%Peq}*va zx=fd$X4{FDr?pIr%0PQ2UGVwu`PI%xKBeauYezw#92TroY!+CZFcNX+)pI7HwRb7w6;STTC*S*}bi; zI@YjH?CBRnzOh!U8>^DHAOpu!61<@yY=B4no`l`FGj=46!`$NxPrwGRc927nK_&v6 z&^+Q5)A)9lbKItO`s`20fkkIJ)LM{}HD<}B4UMYvR@zLTP(0|tnq26FQiPIUMpAf7 zKZ%Z9)T>7^uLGlt4^Ds|ci?=kJfc7$qNZMn@WqG$S`I7P&5m06PWnH8*|>-|CIax9 zIx5B$5S-Pv!oL$1Hr5?Dw+Y$(6(ye!40}V&Umc(2pS>GXUYHspuCS!PJepw;)v)BW%P_jmo zDvE1IL4uvP2x`$!EZ@=u6)rx*r_dwEE zC4gO6alQMLRUnt8)M!w%XOz@IXKdR*3`ucpggma#-y4Fd_LIRlAr$0KMmpt*TykBV;d|@$2;OdY&}REL{cQ!dKrF2 zoCpE0GZBTb##8kgC``=ERs|`+@b7oycDZ;{J}Au&=iG@Q!$Zjc#JCGYHmSlmV9m?i ztc6^31hkW)9JPx{xE}-RU&7cJ3&6w|s9;#tWjiD9_`3b=Ex+lHf7&cuyg4#XDsXnZ zIhw3Y&-!JpJw!qh0=|)GYARNR*vCNfpYUP^X!x&-OyjQ!WYa9!H0-qqt>9(tif!DX9VSnh%|*c|zeAeJDs4o1O>K9J+pQ$>SanQR#6BC|g}v4%Qk;!T@|Yx3^G< zGYwfQ>e@}*@Ukmp-HJ_+3j5UoKqi~dUrp_XO_b`+Ph?joBCTz}_alRIVJ2(LW~CTP zhraGZ8A}ak#eXqC)j}1RN0i1s@GRzB&`#r{H6{7K$Tc>%Q0h4(@hX4Nkz%UP|uDuSepRk%yKyS@CNYRLX!G#y4)oZM4 z$bX_bUP|r0(ZlxyqxA3WX^NFIl^EX$STB=MwU?9(rLulvJ}$Fk-I#xZ;V3u7eR?H* zx0-lMuR(t16Nq~Kw6~fJd%&?kc|SiU;CCk?;*93p^o?q=A_I+s@BNGC?G7S(8jFe}{`eZHHWG_=2tMw{XHAVQa(* z1RN$`UvTkjZ>E^<+N^E_@_xZX83h}#FeLB#yM(3XS2hxQy4g`YSR3;z#6G>X{njDL z|9Zq}4iKgQPlCzubS1O4knVG_?8D2=p2BD4`r3%Z3lzZkZ)u5^y+=`A8vJiu*g5xq z$j=zM5*asFF5h(+zP23(yr4>{AU<)3=>zz;1i)N^PB%@LD!PF?LzDvAU z*D)F&4mz=yX;hVm^RH*K`4YDOP-+6i1e!pab=^JeUts1$P*R-@{wvg zyD?b57ioUq%iOHFnng-IMbxje@4U(o)_{3i3S~$M6|&#dSrJp_^hsYX2S-ZJZB%$; zlbWU(`FxMvQf5NjDb+K4?J6&>DkXF6m`h>CPF`^)sP|*5N3PmZ*Gr?pT6K{+7#>{U z@7C8Kbg4E%^Kk*gdKN#@&DJWwQhJ<9uc;Mf8zrLpBHF;U96bVF%kwypx7e|8a=yiJ zwSMx-qF>@F49CJu4+ZPo^To~bg=JsXRSX03E-dQ6ZedQL6tP*ulO;mvitP$swR=^{!G~! zclV|^2HY7AcJFp#cP*=l{~ufwj@Y*t*-x;3+9hSurhoQD&98(#K68gQV8N4j764RZ zHI`iz2*vgcTXfuRc5iK|Hg=z|pfo+UEJH#7U92$A|N64LMWpjQ>?q!F96cC%7T{N( zzWpf1RKBKk6(`=Zi?v`HCUYp!fN5fBdp92Pq-gOoFg*W+^eoyi0{C%(v-QcQX@Ybo zc6BcXj4RDnBn4ezf0~g;^0>(b-%T?o352BU2Xoem%?F$A3&zfrAt)4SSF=V_YT8uM zvs&7yRF;!UjO+n6Hqwe2OL9iUflw2@VY^ARIS^(~_dV%qXJU=7bNt=EwPMXbY1*0v zt+xt=RSR)(7t8Z?NNZd#FF;b{DQi@V=zs_}e z*;7&Zvl0YCgn+kF7IQva+2CHRVp@7#u(&)kE%;N6v}jQobw0Yy*e@mdM4bS=4nCkP zg02{xf+O_3yT?yd!R~n%Ok@3B3kWqE(=3LskEMW^xYEK=LV`oO7x;u&M_#1-8qU{h zxZ?U@lNQCcO+!8|GH5kp<;sHhYvqQ`s+Rd|gk4#=o6Qd3!rF}}>YKXsL0%|^H@i~&-Y z&gG3Et~PYPnn^!?Z;gqGNuBbVHi!LSnoJ2z%LSPwPSRcd@#x;r^&W+!$N~(3`U5NK z(@{tIPZ#`wWp956bV2oKe0#X{!8ZAvX3jny9MwJ)+W<|ss7&>`L86@l!4zJtG`lcL znRqbhVQHD%`lxzbH3>H8*O%kHhwk7TH|C7xV-MZh7foz-hQP43;GO+uN@wHGnw8(W zup_?G>EK0BJ_=emdpXB494gihU)I>zU6P>C<07zlj=!l^`@IbI;TX$vQ1vakGe)Xx zE} z#$XnyVqd_Do@x$ElV>?NR-p9qh$FJfRa_s}-Kv{zhq0o$JNu-5jL&GP_DXFoVdVa0 zH#3OGd?p>Gn6fzm-edcQQLEE3taSw$x}U^G^rhtLgui$Mk7=EL1i|(=i}qAe?G)QbuRt z^%wH;P&~$6dUMLBRpG|muh_|M|DIv z`EJG(__BRg7U=KSgq;0@?D2w(#xg(bM-QIQ&IExZQ7 z&crb-+DPtg^hk3beNM;bx@y1-;G7z+MKm7RR0Z%wtu`$pYD<2=Lf@1M`uyde~ z#|}rW*3B#2__81vMi6n4o+jJ(6H6J&PwvQHcG{QaLytJnt6}PP_hO;R3P<;49_8&5dYulO}IAJL&1m+=-r8%IUOn6Q~bMgE(Olc zyHT>Q);ZZtT7x}=DuNoWU}4x)Zr@DD%Hy=5Lsrc}uEc5V34|gMxsRUsiF2#60Xg6t z!%Q#?l~1hRY2_8<7m1$&=E1>Y)A@fq9`0RjmlR5ZxYF zqD@@R^BQ=g=!DI8{^WsG7`%bVwX8j`)b;b`b+3_iLH6wkTQt;((d1=)UI_NN#aJn+ z$K{z+c(lixADJxnHDb+pWoMeR*ym^FZ+#G?X<^OeRFQMEma-M{x8%{^Fx0pMkIOO+ zlJPy%;!_HX6(u*hU4HM-F_Kz_8^KEOYebf&zijpNFV`tmmnJ1Yz)i({0pLXIU0bSq zHB}+tq<|u3Y6s%029a$6oK~)8ik6b(;N(k{nA<(H@L)A3mo!p=pOGR_LJ*G{>cbZR zBxkLCU!i4T5zb_FH2)V1xnb4jo^%2RMc}?zeDNbIAHiTMghOESS+k zEUDXMT*+ey8YmSC;PeUe88ZhryhdDyXfhkidwC{0c*}57%`}BvbgD4xs7UVDA|sT#wWfo;#m~2udjUNxz8Y$CqsTkK&HT9 z??j!4%1vZqwvXPUm8Rys!(!pI`@KcCrIUG!EC3L5gf&lgwWi89)uyWs0{^>oPIPV} z%65TJM9~4a6Ay8JXMQPfV@TCN%Bd~R`5CpBJ+3ru^3i)^)GGp`_ypaNbzZ{IaPHJb zlO}XyV~&mP1QE;-FqNLUrYI@zbxH2|P9kMy?&bT2(aeW#^D7bV`PeVqa6jnHZ+n}9 z&Fk^LSiZc`(A`Cua09|sXqRNF&tNTT8fzDQ44@(hli&a;U!$-6DTLnda3iC0G)H?% zTmU>82op6ttI}|d&aHU-j-7asemAbwML0=x68JtJX>jy^g_EYGp9?-5o0{-EIJel$ zfc;!+A+tQQXDcBkV?vDBhpk5SG{}p~T%hWJhzCov9Zy1)3kgI4x2v-wY=sN=3xM4Z zh_iaG=v+jBXJ3}$ls^)ghDk#m(^`1ZhM6gnoI-s1Y+-Zr-4*3| zK`O3o^hQFf2|HGJ7R<=;mKc1*V%bjYd(B?j+PuyW%J*TtJ|(-HjYd!%B7U2^V9(3_ zxy}BHxT^(>Lo@3zs%WJTbZD-M2z2^S{5u^8$PC-ocNK~J&Ze7b+GvNirGV;dHPb%r zNt{(>2o#{McYEcQb4|_$k>_z(SJzbqM?O|QE9{Ky&%po0I^iF`wiiHEqe&FIrWE_N zaG9USdrKH8R_yasIID?|}q^0bxfnw|x7vfr=Cmr|_U>v%}Kf#z0 z02W-QV^ICu0b^38It+a;Dee1-C_6BgsM?VcCcwi;o;3LNdp}|VO)qGXFI#X?8H>@H z-73XRJc*Xi<1m_>m<6LsO6>}dwNMWQ2BD35Q z0V%=s>Wtz)Uc0Y%+p~Myy?fiO`)it56b%lz)*8%ml(C!1&KPPD+UDQmrM~BJU-k44 zBN^Xjh>Hx2U+arq{*81!-eg3|4cFzD^J2j>s!b(1o5*n?7UyU=QYL{27vkeH3(AVY zW$~nD`#JIhbH$Dme#KA0w~WsLey^<|D2eEx1%BAnPk_q`dEg%7YO|!W06wO}Ki@xtowFIv~2Q~7pmN2AhJW#(hmZh%mlFPJ}E)UK?e=3t@1 zjmBF+(NIX3KQFLsJ(|)j24B)oI9>+!}Z6 zq5eclDy#ibvn6|GiHc!0ll>9P>}V79PRx4i9!#c*eZ-Q$75G@7Lr8x_9tcvyW)+zFYYtIKCW%t{Acfy-5IK2!tX))k4;Gd%D&Y z8j!U5rK&xr7TW{6VOv~)J-1>G_Yg8xw;4;^!z-qS6fKRNwV;2ERb2P2I8$0e8NVK( z>NrFp7nsHXzP8#-2_8-7S3HEG3ol*RY=}tsF28F36nRG0m35dc5m3=oJW(r7I>q1F zb=8L-cs6PRyDf-?vY+CGzHt_yovCirs;Mb?{sDEsrSpuH73S#8tX=Z4p(1M$PMRQB z&~7)I-hCtVmJ#(txFnr7`=-j{lU}m8+-#Ts0@X_kL;=4||J4j!=cW()IBOqZFK@qC z#cEnVOWXW9gQre{Au0CK(OQ8)Q=#yepPb5r{n&=1yZK8F^#1o`?E(n0lOk_@Oo&Y8 zuLJzRj*WBJ{kg>d7S$l5#+EWHm5;+-!-TPHtLuwN`|&thxxr3k!N(r~sh*wy7sS?= zT3m9hbdT+Od|g0|7{GP)x$Je}@UjO(;3WHpps{`*|i zYCF53{4iyYOM5&-44A87tKA3lk?A1m;tr*}W_{F_U4$>Siv=1$RHi$N|5&FMUQcCwe2h|J`TjdU5{FXO%+_6hqiz`6}U4%k@br6VI;6x65ay5Ba zJhOIWn`o6os6V|OWEedB)_$AN*C?1jm5)wd?Vf>=2X_47(m;)QfV9Q};%*q$)eFAh z$r?q}P%^}@Ua(SsQfjRF|Gbmbwmq>snQ;ibI5pjv<47COC@C$KDf|oYgn?+ zHrz(^F8aa#q^Td}PN|QTYr;pqcoxkA2$75$*7~*u$!?i|rw6lkC7x+M7yqdFC^hZ<9bdQ7hQ4*>#0^Gi8lL)O{IA_M-}WR{QCXZ03YD6k<0xhwSQBW z_lT}`7YXg_eoMq|Ldske4&pFH12;u{;(1lwhA-<+o{*>5dy*-?g>>Gw{<^EtOhC3Z z=vMWxKm|kZSGbNuo-l}yNTvb|m`sibGe0t&$OfeXVv{FVO(nkq;oq?|Jtkh821uDy z9QdLcp+OA`_y0Ovr;?LCX+1uCaeH5g>tHcc-LL6O0#tjBkv8xA2Nt%cD+{$8mzlqv zb;}mvMa>&1ME~yNbjB16O_5 z?t%g{H@z>`x&EJGWZ4E_(ag#WJ@Pfa8B!5>(ngZjbz~T$zg=@hWjNOLh>zd|p0pn8 zeR6fPf3u&D_0s8g&I@&cJAtz;yvA@CKL|QAN_5E5Jot&S?}!MeDwgQXwC%`qHmV3+ zb5UCI^7Pd1%{CquLl3=E9nNu@4jKnOXatC(zYjS_=Q{-!6}KoojolE|m?K6!5lXf) z8?FUJ?^Q3X&br-{QjFqC=W?L4UwG&B&5rG=lO1(|bB-QEfez%=^-s9nS&3U|zzx)a z4JtArh3E4hMs&UEnEKPmD8pmP;q?#Upml#SG@KL;7nyQuis{?8S6B7u)v@LR*)Duj zpHIu~$76q?kSd);n}`E! z_KC$eW8M527(@)r04tr;I}l8CZ{dx1yzZRT9rgm16ybJ|eelpq@e+HC-beo*R4|{r z7y+f6@d*d@0~`boMF6^E)c`V7?RI-+M1(L|6ue54JzfVOM44*;vG55jOv&|@a@t8z zN;L0)zXtVz8|PCn~2)|@qX3zuxoyvuHA$LG#aG$#C}!s5li%ToAeWQHcQGI z?wf5v(1EA493Ab3x+Fdx@NSpN-wlPI`@bjJQ#|pJM|wNp;;ilF-txw?#Sohgg@#U6 zen#8AyN^==5+>;IqMS}8EL7kTy<~GB?D{{4fe&Az3rGQ;c4EB1v9GDDjt)orHCYhx z;&l%zdNTnNhx-PRso;kO-DxTOR~-39@&)22i8jlDr!&NP3QrOOz(+?+NAC&9#U~G^ zgYC!fZNa66e<^qz1az6}#j^E1s*C3=ngI)0^l+u!gh=yYu1fQt{<6vThG*N5h;=t5 z5cqxaGL6-qgjYn@HB~z;pEiw1bvJw;NQk44 zz7c-u2dQ>esnB`&EM4(LB%fR*17iI4VBtf``9sFnt}rZODmCH(hhAK^n5^dn3_?%|oCLcXv#QCVxPcoFi^)a2WZQ^(loH5i_FKAZP3}5c@ zDQ-0On%e;|5LZL)E}PNXyy`}Hzm@f|21umFjP@(gAe!Lktq;Dp9rg@!L<#~|6McjD z72_iqJ{@iu%Kdt1FxIWYP}V;hcIo^>IMM>l2&zxjiM~VMfIM0Zm%43aygv433_mWj zrlpcbE~sK#dQI#vU*-#rbXQ6YA<#vPG-Fs%&TD`ydaaMzKwnF7bp)S2*i*WJ<-DwfLf-!=iQ&5dg2w=hlz*xoY z>U4dz5>ZF`h~V-$Q(q~++@G!yau$-`$Tw-D= zr?-UA{4Rtb_H19*$4HO}K01B1LQI+aq>_Rw({(iYClm@l<+l4DcIM{+LcR!@Z`&Q9 zOZ7~P0cZNNX0x7lH4P6eO32F$dY=j{jqZ8zH)x+vFZuZ-|F2DAgQHl<*$y_Q$`&+O z+)w@4+=vYtJ-xP8lz(`g>+?PZr3j@)^y)yO@#uJ9Qb#j=3ai+>9Dj%C zd)Lv%#}sBiDs$hIY=*o9%>(v%l7y3w!|?p^!{~1N@?Mvf}m;Y4X%&@|@nn2e^-Zz1{?~^fm1~e(f7L8^Wy!b!GHyb{R z*B5*KJgzPw)uib=Atk9j3%0b&T$NV!vC)c#jrZ+wA(R}_Kl5-jbp1^3@5SJO}DS39`wzG^HBDi{`E>6 z5Lg;G(umvDySBLygt!em##+csBy5Z4z2PMhW z5dNE;SJUt-U!@ra$-f1I8qc@U55wy)6?aSFflpTj`Fn-Av7m$Hp zK`QnQ{^Zg$Rvj}~5HKPjnV(1jd3$>zs0}PFeU9z_fC@4wH3{$zd_=i3!Q^4!l?OYY zOUX*u53k_b23tq#jnnU(ZRl^cia@70y zye5Q|DxdqbEW}wt(CR_t8nA+08{TR_vyj9WKt+IniL2iNk`4jY@SePsr&SO(_b&t( z*>EW_5q{z~<0ewC^T@81695Oc_wYwqXKWg2k@@F`DLunIZlYXEBW={||(f(e~%~KE9o+S*g)0x4(#g8X>!a(?FZvtQ7tK7oI;)+ZR)>uBDnFcbii* zMTR*=PN*WsPqW;{y|_s2ku1g6jWbQ6g#Zm_EeO#7>WU*b@cfgE;1D3~KG4WO{p7+%*9l!3TTQShV z3PPPuvwI3tQqIMcW7T%_ZLgy;#IJ@%|MQ%bmU?NOELzM_=#I#BZmMR~t$H?$RQ1!T zV+uOWEBQE7_M zc#-|4(-&^N^wT6rZmUPb!hF*)AFmby-WFdGxkg zHi)~qFjCp;B76&fg0Y3d2w>K=!YX=j$EKV8&1D9f*(^ndDTTt53Ch7A6G5JE4mxv$ z9u}$@#-PLSvdRszTG}c<4BAR=+Ehz1C&~bx1f9)265CjMhWVEB2w5$%qWd>8@S1{$BNboU88`#NPZQh2hFy0D zYZK%%Z2v>Tzkn+5L!!l8(T|vQ(3;e8lJT=A%va7GZWf zh`CWyvFbB+SUY0dI_{~RZC#Z69;ZnE^}81>Hz5d~D5LPq9a6^&53RL_6AfPi$y7Su zp8o#Cb1^V{Ke<@|*jB&Cz=GU~pfv-npuRsjW$}CCn&%35yEko^3wT(RwLn?>V>idl zPt=%$PDB`cZyEJEA8enlYZs#5-nHM}N4{}XD0w^yjz2Il>l(LZ$9y}$#1gHgbv-q& zhmZqZP9TMXsR+y_8XZR)EY9ctO(O3!$Eg5~`|XV#W`Z)%1ZPEb2;InA3z16eVwc!+ zeSxXNokj~BcNg+%fbGL`mX$K@`j0Qz9mr{u0WBMQf@VGIrapronr&zG{YDnxV~{&` zMx?(A_?@s?DfUDga8Pz0Mr4c60SW8zxFaog`)Q1z=7c+x-G)b#*?r@+iD9GDl1?-p zC{|{AP8CK;Mb-QUP3F!tE=}zfZ z0RidmMnbwvx*i1=|o1oY~vskCb0_EXAQbJXt{LQ-A4jz>ElRu21LuNKa^lFqPIB< zace4(j4XFC^EcHhlWGzyn})sFnVIDC>J-@SpYn3PJ|~8MPTycq706IhV+ro%-nTAu z|9q-U|2HT+!?h+LDnCs|M`&BQykUz^>ptMsi`e5W)noG$vM(SsAJPq`vj?ww`^$)9 zw==oWzQJO4mfB0Zxh8|)ZWWHOot^OkRB5RKZcv9r#>EjKKi_ehxy?KiCsY{1!dU66 zaRwcB&TokXv(F@nOhm-o;%e7p9XV5~QRjR!)+UF7$?nI!A-12jhbj8&7=5ZPy^FCX z_sWJlUZP(elTY~@5|R1TVWD{5WYOX7?lrlsY)Et;y@d%G>iya2rWOe7d?&*?af$%M z$m$MC8I#cyo3a1MeNoD`~jr388HzY3#gWNU4&=yiQm7P4fPJiwV#x|GCEi z@e}>n7C~CavpOB-v+v_0Vv-1v!l63R>Xra^^F?gWa+hIbwHn<1Szr81Rj3aAIvMr;^L`m z0EZj~Ar7~WM`c_N8JReBTrG4Wwx}{&N9ejfVI(uYTpS%W>et=ZU@|+W+$rA70=rr0 zPw!~(UlV>LcYQj%%|USa!DJGsIu(5RP@uMLH#?*vfBoM=MuIwU1qpfI zYqJC8X(mS{ifyB%5FoNKUv&oZ8tyrZr5(h8cA&Z5$sG@DN41@jsZHJjY!3iL$G5r# zsC37-_*a4{t)WzZCzajd@kbK=Wjk#;r;OYfAiPg+2{qw}a%!kw8@X&zW%uE(;<@^% zLNeCN1=a%Kg^_$R(e=t?1@7ODa#*=VGhV$Y(dk%+5#GXOnYO=GWi}r_oh+T?@I};P zg^qG|4;-1!kx#9h*F_T#>rx2n_R{*Ot*HwHbfPVh1rJy|^O4nVI5WRgTzHQb4&oS_ z5=`D8Ul$l3Nn=zq237(iMRWac{2!B1UJk%`fACwax{5ptUk}OORhTIP3(+v{Q@~0Y z2pUdn{Kn~N07$QyzGp68XAh)o!t6-?TVT-vP^Q5T&J9VtkEe>n= zcsx1$Pbpufo(oEE z)Tu=eW24ERCzYBBViL`*-{Z`~P|Yd86?GANr0jno&5+K*P?UF?c$AcRSomeIqM{%w zN?{e0>VerSVEa%Rn~%kOMmbweWO5*7&Q=R6k%>mXW^;l!dbK0S5X^f4()i-~u}SJCQojC`TV+?fX#MHNkY#z;2nfe;rW^eA~v?i06p4-;2&ZdmD_*QSse zLv8E;wzZAIE@NN{EeUa;bq4!kTwDJXL5!de+axY`3`2)OFfa0!Pu`%LVL{rC1aGL* zD9PY^IP4ou>B12cWlsrlm}$$4NW6Ze3CjL*w+^B&+T7D`smoeY$X9JfWQfy@YqhE> z`}-~nGFXKSu`k%-f|N+^EXOLpDY6umQkvm18_3`WTbX%L)_)%s)R*|eAN&o+Cq2M5 z`Av*7O4rjz5Cf~zF%c}HW&W9AAX&@)9-{ga>6}}1>++5BAQ=T{_f?gAANT-2vDbFy z21I6(&3L)yW~4!XjX?;iTs?~@EHG7O2SrLb(CG=^-2VV(j`)=#KOP(@00**a=l8}6 zJBTTJb`(&2Mg;JzN!R?DT&I4--pCAndy99c9jM z8_8>NfBa4GPy(RrOn#*Bu7bjZdwRv>K?^m-X_10>cka>>=(WG$_fdw?h5}VN9$dfT z;?jeddBh|5d>7MAD1e7wv*?gX>6VaENR2i*n!x3>na7(XFWuTIUnnz#`Klsma@V48 zsWEhoiMB?x3Z2da=2_c`j7uDL>{1PkEN4CchkSPCfP!vn*lkni>;xJjj91b61u7^oAS8P`WG2-M-uK0Z_=3E8SxconyD^0d`H?wo_tuj$2n> zSP#|%xZ|)ozTJ#A6gL%Jo2O~ebUw|{xT2Q5h*kzwB_ysViCoN)7@1o-_n&?RmcWA1 zw3g;L_L4wGQ;i&Eul=tM`zBuptHU~z$9<7RWn9g%G>qFTQqU>+#v?AH)4|7k?{12< z-f@tiNNR8F<^&G{6V7j~m>+tSHf_C&V#Y^-91I)41qvyzaZ&$nfV?Ww`gCPHamTL_*mOWL&k?f{k*Fwq|vE6DGbK zf&hFNtQ}iXC??<`kh$;wtNygs`X4&w!O$L;vXR-v(V{~?vZLr6{Mbp*)7BAW!-lyg z=P@lC-}TLa$Dzf#=vdfeXXUQ!&p zazTn#Sc7*i$REDSnPWEm^oazhCk5#7sM&lPoN}Fy08_SNZRLnr60AF6!kiF=m9rVy zV-KV!f?%_jS{D3M{Va~XvnMN`Hm~d#puMkO0bGoD^EZSI-oIAHuj_psk}^r+?9qxUY-+Vh%gH4czu}47G8O^4UJ!OI} z_W>h-9T}0}R}ZU8h6nLe*@mXiCv|^OU}6>l?-eEGHJRc|E*9PJ)JGK=*s&p8U||g3 zm@&kU3i*snHUDRIYxP+?Uou$oD`-id6KQx#F*i}b zs1IejSf9G~p}z{=UJfeMB# zFALba!4QL$9$0E7YD*_hgnII$A_q=d%N0SVE+Jgm-FA9*2R!&JQxzN4|4#whWq~}7 zScS6R7ff>1#L_%3;Xn0!OLbG}iC#=3KpcNMkyu54po3h2-E1zAg>JS;V^9M@5sSs; z?HACz4*I*gd|-otR!`K_QqVN~qYZO4@K(FZcX8`<6=rn+1V}zp-i=jtPr1X{sa%O0 zc-20%C-0|JoFD?^^aDL}RUUS>8F(KlMH;cVed-c~@+f6lpQITEq_-a=lv}P@kqP`M z8-x=&rC)3OR#XON9$({8za97Bkt9aXq zu8*%A}R-<+n1dD!=txI)()BpiCbyg(HM)*~Dp8i&Y_zwcew z2wS~o+%nR-^;NI0F-Dj{!IsR3g$ zZfFvP6sU0cn|zYCNGr4RJ!a(u76Vurr5)o&+JT)x<=>O+jEVBP{KX8~8e;A2?CP5D zX<|cbzWoS(8{>Thr}JW z?Cxn#lANB6DI}Uax5KG*;n%R>vdVmCkhb?9j5wv#hWV3 z4yG%hlVkUr0iex4?EetS_z$XB%K_*UWT^{-sij><-X~Rj3&YeUg??zzI%E3JB#?SM z$}vfjfy|Iq7uhS{mKt(fsXj0I=hM~GO>`*d0@TADD*8~0kscICs`oR zGou3779tJ(H|dD^G5zJkBR-sh{FmmLb9W()Iu~xpaxkf__+xV2AH^PjRaJY8tU}r6 z)17O^?vMKb(s(##yj&A`yxbaWpQyrs3L;T8rNNc!{lyFZxsD0}kdDX$G^7GleVKP& zl%+b2@Om+6Q%m!J&~v8$X1QK!D>HXf5#@h3owNEZ&CH9 zI5>Kzqg1{C5aS9M4d>F%6`4Koemb~O-3DCrhKU-F&Eynrk=MSzC_(R|TVRN*9Ji~l zy7JcuM0YHIQ3%lbaRchoQFSQH(7YPFfE0>1pW*hoe4ACoG1>MGoe~3MQg}hAuO$7i zChwni+|Sbo3)SvRxKYv{hYR<&w?*mj`jT?Z5w8`0cGE!FKC-3nZoSCPOY2sPkKkT+ z{jdnMOtoVb&7w@(Y+;*QtLb%1kq;3}Uwc(1=>;NwfWkd6F$4sy%e5MB8Fn{A6FtK< z*uq8rux_`xusf+FyR7~kDish&aHEg07~c!iK9@_0Q_J4G(B{cnYxN~}oo|3UqTK6Q za20}AYayvP<_u%iHB4b+J%0C-rN((M_!?qIk7^t^P=iE($2R8f0P z=A*pmaG?Wqu7Pi>BWs6rMvFirL-@q~Yi91{#+-Df#@~KMt`Y@mOw+Z%#IwE>rmRNe8 zjaYhR@O7RqC4)ldhK-}>IySzp0Mk5RR}G>xu+DSCtJh%(g~v)*+r+o>+b<@0D|WfA z(d9Xjv@^-82cu1nG@ZW#52Ht@$??(?V*ejKeA~-ML`@%O1d;gdNTrntXt%^+IIR9R z-ncpD=kcIe|Es7vv0X)ZXVR?#W43uo-p^U-Yp2mpLbUi*1Sxue$YL?+O?hGA0S}_5 z;hx9X`1of~0%`ZTZ{|c+TD@bIl(e8w%hy2Y+wOZ+KCi}5f$T@^B62ZpoXpA%&)3E_ z@C-7W$j>OSb+b4<9SZ(xk1iH{RUX~%qtBfrFr&MD%5rVFWNyguN~2v*>zyN3>9253 zj;AX45ez}EdD%YWJL??_qDLFs-!hZJp3BI_2Ca4GmIcru02&Ks|A6p@&6O>kPy@Mr zyvmg+Zxslp6sjUzd#1c8(#-5=I3BpEH}Gvxk|~qdz+*r2=pMiv1ap)v#IExx&K&r# z&5$)`ZR$|afFkr)8nhWK%uZa*8tZrb=e85D#s~aYfdO?6*f?Pf)>w031n#w9RXYov zdmmnWFvj;U*+omml3NB~QpHE98wWa{{H2rhpW?9Iv=9JeX87-H@&_ud44wwR3H{B#2@mF5 z@19xr8IDo>KrI_AC$)Q% zGqm@AXk}vlM}A$+1Pma-=Di{@Qm+XKe5Um4!R=&}u9IC-nF_Fz%G|c;Y=?b+mibbV<+w`Chpxs9N?8Z{4_Mf<`nbV_!iWc-=iz>O6aSbJ3 zE`Q}r7A>AD z>2cIt#GUlP^fLA8IDHC;yF3Y?MNX-*td6jMu%0sbPeD1-hCAJFLCXXAl$R@qp$TWFWjOYJ4#Oa#miB7v^!*~LZ5)a9#e z)YHcEOHz6xZ>(Kw&wib~TQJ1~?(T=vqyzx^tpERql3;|FXGnT@Kog_FTpu zis=f%6ZKnM#0*|jGn3O38HzSmpKFfp?(Pb+K?dM31KYrTA-?R+{9KZC$&f%1&E?@O z5%hWP>@t4R&Y%9>ts?tr64J2ti05BSadK8PIek&bo?~%k@146k43ke;_KdQ&$DV-( z=;yZtO~>OmluuhdXRj7J=^jFPIEMhv9|2AdIDZ4hSH}d*EYsQjkw?=_3tFQXZI-rV zOi_sYe2v4`HwYFv@$^Y>ev_Ywja9-qUQr9o6_5-D9VE4GBxvRqTBR8h<(ly0hJ70d zWaPZy+ykC5jhf$3xLPX@^3Iz{If1)3YjmiTm)c)K(D`#lrt6A}rCy2XFqDSR^BtzY<{M*I22 z3HJFuYJ&0kCqg2z3s6*nmI9Va;rz%+WtSz7AdtcR8hw$?>w#bWpm~ROW2%edx?g#r zl|JS1Gi=+qbx! z!`e#R+L{sEZ1VW0nW!dmf~v?ypV`a7?O)-^`IP>n-Ag@zqha&qR~+lt0s^uOlP18D zMRTuFDvc*$M_e*lXU`K@(Hcu$!Ef1cA8DqM?JH;L!^))@1aAP1E&=+iy@SE+nP2rO*oMci{=l5OIbh^YWn(B4C$~C z_m#o=*PBGu*Je=>lgW;Wpj%+M+ejp@0xA9O?~g$6zi`H9;9p8h_s~{#C-4Ix;2Q^ z?AzLajY@^XWLc+V#%DQ65KkMyY;R;pP=@3~k&q0e6j|G@UWyatq8`X~%qq>LnXxos zXI@PsxaFO?ds*E=m)$9t67|J2_of~?>3nftvT%FwpNrN!)evf)b)%r9=mFCMqLusM4{n2}6YcJr;L65o2jglC1= zsDj9-BB`P`s%K4;M!~Gg)@R=dT)PtgciPF{%E0tM5H>+5`}pk-_fgI3J~lZ!GYnuR zl~qzQs7d}4eW%-phJtst@x$b05XgoBQxl4)dI>kCmedtdohg+@?;Te#c2wXQRC33k z>w}f^a=S;A0?c~QMF370%G2)htqE70hFcFhmOmGAPx3?98}) z3A~}*R~#4{#MrQ;FN-v{k8Zv6#>+JP!J4O=-ZOWV1O&D^$OYl!!<=Tm$o1}+FW+v(J59m>G9k#st(|S{9VNn3Rbh+mQ zatlODvT`ctsER;VEVEib?`HT-YxOP^~k_Bt!Q z#pGMk>ttZPRlN5e_<_X!u57^q@Z9`e9r|AZ&CzBFdRm_IB^rYwuQn3&A)U<*$noYw zAQ*Ww*=6T~reyfRNMv#P5pLH?5FjBy7Y*z_Roy#8D#)$8YFy%~wEBBpWvxE7Jl}@> z^=Gw%WMv@%yLR-M;qYP{-;zkBfpk9@D8TxSKi z9IEx$sE`vw`~W%wBMyK4mgDdf=R_2RvEZIXstWMRf%cAzF&rlRtnAc z0hZLYS+zw&aJq#|9hr{CJ>xvUpwp#kOs6V5IP&`|P)bB^B!cTqy3gGkrg! zBLv#_ro73n2m8|$D&nDr(-=89;S#b8eG3=77SV6h7kT*A5wK^1 z20tujIuRh7f;$Fp1<#F*NKv=s@(fQ3dH59Ob{BeQ#>%G}@ZybzI}(->9(6dpiDqyP zP<#{wnAra|T!Tg3bo~tb&7ogIC5_NnG{yh5&VFw3ggf(Qekv-HP?BfCP2z9xp^&Qk z%~%s*S=`M}{h-y$hU3R7*R2zWG}DYWUuX66g_8aXX6+Vo%8E3EPyVL3rHG;R!EO8R zO$gvQq1ef6=mKV;(DwvpynaHQWYpBL)a^h+la~7P=NBC7_L1qoPjHqJn7V4$B|hx! z7g;?P9Db-{(#(J9ymxu;b?0!NZR^}@r#^AJ?Via?pg?7q0%b^Bm?3TlKAH3NZnQbE zbQC^kZ@1Xr*79^3+vJ{*y~tzX+1uNzFoXuTD0^(f2l~sW^88q;VJFQKk^d?xiFx^F z9Agqfm)z!^QZlB(^F4rDyUwYmt&m93?>sv@T7&pLwdzoY3@7p$0qtIP- zv_V@;aQCy&o@Z?GoW?XN42p3Kbj6mq;jA^n4SO%EGW0b=9NTBd$tX@DI@j`q@CQk5 zR0>3z>anJDPW6a_qhHkuMnp#snPf?aN_rj(@eFn%T0tgCgp4hrsYR!yWZnK*d??*QfW;Q^1P55#xr$TF{D)Qr_Ir} zAJ$JhrJ0LqPbqGgJ}9d{E~E4%i69&dDP&MwS0M=7Hlf5DPK&mCI8}G(ia!b}bO(oZ ze5>;WS?9@(?y)K=?4Md25LMEDiM*-0e6DyK><&Y@4)S6a}wfrejC8oA&Lpn@jS=3 zuoVt;qcKE5{Ysf zMA_FPU_cYhK4Ex6MBlq2`5=vTc@5OF};EsWHmJj-j{knHZ&)D(%?Ar zb1*1Wh+@Pj+g3Z)u#128uHLI9Kj<1oRh-t;*5>5obo`{|8PeotVi6oTdgRkPnzXMo zx4UK6fEo-Y>Fi&q>s(EI6rk?K<&qD~E9{+RGHPuPi&?ybYuak&rM4wGe#vRwL_uzw zh^DKY#efUgl!tYs!?!a-rB47U#r)3#L1NuuWNcOS_9A(T4u)ns*&zy&*<5cYZsY1+=c zkd?m!l5|8Qf6W0ZQ2&s^=x{uuR8R=7TD3Rz=7ZfA^J;$*Ncq6x@ZY^9;V6Yg51*{f z<)^3UcPUSQ6SFyEP)|>(%W6-3$(RD+$#_CQiR~k*S>ixo&ziK1Ibp_IZ8!#k|ZmVf|CM=^#gbY`({iCWS2Ias=)fH|iB9ruS0BUX<(wY&=PSxUJ&L5juf8nuL@T z64t=Bh$1IN4-Wz1PiLA%|gggzsW|B>066c%ux37Qh9G{?bf@+LgU}+%;2Q`I29@b>AR;`z=qMsG~ zaYm9YBYcSSPsz^gjE>IwZO?^E`hoK19zCRdztS3~uV13=C=htBJl?h|q-_--?=tll z8aYNjQdQV>OIT4U>gx~ti)P-BD`c6;6XdHam{8GZUugEaVcy)#$ozWUmh{^v@qA3> z>x|lvzk7cI^K`i(^qVjb7G6QJA1qJ@8Z^hvzYeatuS5yt-ezWLa$SPEyK2!l`qSZl zd^?)=Xu$vgE~=*#->)REWlk4Xi5H1&`l|U*zYE}Td&x(^Gh_BJ0Bq-@TBYF zTs3!p5uT?65^F@_NkX0vp(6byzNhSW*BetfF*FyW0hx_5Q2riJ(2I!$T=GFbFr{euSHC^dUy-imm zU;*HuA%lX^{^||+qMKvP>751dWH~*pndsPX*XGTcKV)yJP1F5Hrl`CsfiLWK%!i}S z%W(hlu`!0&n0#d%;vdVNEY%e-%`efTQ-rQ|kZo7#b`u1CoUvwLT{zI$AIjiHNTqmS z*3zCUw{Isyyn~5}xiwSN>mcJv`P03_W^?m{Zk-wpHND*47e>=epb8^ETxl|h8gozG z9T;u0{B-&&PZG`m#Rjr$?faO0s&{K6`${L+qy2KJxeP8OI!KA*^22BG>nF%w&mWi| zieD%@lB<}L)KtKdgkcyi#Cm*&TR}}Ea7pmQQ!=M{hWuJ8Se37JtBO^`R=B%t^Z02% zx~>rPrXAco47wn@qUE)fL#f z{gGlYNLMJG>49gY*S=}2p1FVaOIo2lDa2;J9_Po8AId!aX3$_vjYgM`ON~{p6`?=r zBxYqy*Z8*)zScmug-Oy8MJe%IgMZ3td^T#tnbF#O6p_`O3DBeoH{bq2CUJDcs7o8?lp>O2M3 zfTbe|x^34y4-)soMJ;vQ2a3&MWfB`0v zWdFs5vBL^Gjl%g;&z)7ICqeN4PrBk}D!PKi&o{R&Mrx1Ef5tV8XO=ehOv zzOmi9tjXv2m%m?r2@;Z7tTuzVM5XxNJ`l{8ThVVO$n^|TKD-D=JFW2UVIh0%g^`Gd zpYuj3+Y-%5KG1L**xuPUf-=q4ZJq@L74Ul;;lnwW3^cKH7`lP<{n%2B&L=$IGnX2V zAP?t!OB3ILTd)Npp;X^PZ+BNwPjAt+!9e@( zw9bMn8at;o?FRR&y>vDW=tU|-S-8q<=Lfoy#+R78CaM525iemZ9ZP=_htK^gtlE{> zQTa4)lFVAcJyxz^S@gKVbTr$+TxD!1<-2SxcLrXt+iG_w*CmR0NR-#$R>iX}1xH`J zCki$*AI12BYPkIxPt~6*eE5UlDQ+5{1KNojd>;RqIJC@mE==IC37vQG?AIwgw`}RN z6x(YOH4a5iqxhfC^5=f6*oinFWQmM`+C-L>va7Fu3A1jlJn@sd>TjTlSB-_Fe|?L# z?|ut`oFU&jVHV~YL46>6mCu&cv23h-NQEMH z5A+WAp0@ILr;)hOWJkO($&)sT)RVAbFC6U*ABqP*EbG=AsWf5lr@^JvtazeyMCI1w zSKb$W3Ju6oO43~C^b4avM(eLil@MlMr9m0FaCwie6&VfT;BO#oI40a7Z6c;ka5HaUoQU;#{McEJeba9_To_vSr8A{87 zS;%kn#q+Z>7v|+5zXV!(Rc7)_)i=5jHLZji>Yke<$M%7rbO?vfw<3C0I|$9oyCKt3 z=+k^sIy@g|KaY-%7VFV7ID6^AGTho}w@DUlb8LTik#9x)ap*Vsw#%I0M(_QYC5%Zn zjW|`pM%y~{2!f#l-ARwg08;v|`vIn!(^ahw)9IJLLU1n^uY$Il@QZ?;rz{F+AFTg0 z^D+jeoTKr)%cYO^-c4ouB;a;XLC_C{ld-=tNs#3Z8?^_Rrnr>nQF>mIx4{w-0keKk zUs;(5Yn+*x1H?2FU;o#NZ|BzCLyP3gR*&2dVe8?jwu!-ivGg6|pI-OH` z%K`ZlmBpVRuK7#8mkX}B-@m_LjKIq0uv9D4TUfH9v1aaP82r=W@{HfW%weUyHH3-9 z%i%_ENdN3rjQWo^{a=gA%6fJuij?&9d={&xdMUosGcxWSUFl#o{`NE#S&DPGUHtU_ zq^c42D57HG1-Mu1m!hip1u1B%s#I@{3FjexZcNDi9p#+XX>h#_Zn_5%n|G;??Kg>| z96~wy%ySEii~cASvyr5BH)I*bJ_LJ*H?7mxo3^i>iQ{#dm|=IXS83$w(#-^1tGa;oCOtIMzS54?d}LHM!F{u@PCkTsoDJsI4;l1)(71Z-ZFkZlB#B{ao;a)>Y3yse|sshz^ z`qko-p|q&1B7qbi4vGu7?USRPR&9rvySQuh)zpwW(s&ftBrSEV_SI%PN<@gwk|9b? zna|Wa9nZmdjam~NGkk>}@Wj&R{D;2B>f485%lTM>kL*J4fh=PN!6D{d} z6|S9(R7XD+Y9%iB+V2-De8k(lwO=dzu_d%(q06}_JRVv9)9|n<-o6n7)XsQTI zNv!oV3zqP~wTtfS>vJIfg85QZ8I(wcg@rB3@;AtqlM1ma7hxEZ<0f;$MxuDB8y=c!l_U*=UR^C$-^3HwGyo$5L!Vkrr=g_H9M=NqN(s zg9=gn4^&7Xn(aL{nk3nFtl-~wGVHIML=R9z61dtr`+0NY0c!zBd2Ktx@fkt3jt1qY z1MwNW3pNO}-Pf-E5P9uSR8@xInCoii3+jYpiutqh9E{zLl=3yJ9Uu_zyRJPe|B0ZO zP~2WEeBam#Tt96V8t?%$*v?!xze<+h!`P;$tQ@$zyNixw@nNcQFXSo&G2eGEL+Cc= zl9hlk;|#&OwtqUB9NwUuxp8i!viRquWjfZ~kH4~iCULs*85oorD)CYh(g-+^XSiVn zjDoE@;-EZv!NNh>0FPFJD!*Fhk5!6a#f{{3ycFeqQ0K9z+>oKHqAK(G3)9{^FE5~Y?^ez^>Lpvx!O=P()K`YPZ4{kfwY@eSg~8-N+|ypxqx($Y$C zD5m%cx{LA2$(-6+0#L;TB8|Wagz7~Zn!Tpw6go=rTbMP|RP$43i!1YH>c_3$s=8^a zPeIBdE+#(jD$RRSzh;*JU^^OWuT$-%V8iV%`0gr2qCf6-E3BCG8;IidThVVXC|kAz z7s5T)IUuHk=aVuW<_=O*JCLDtu0MD02^+@p45L2Jm2_-y<1pZ@q!NdK_Q8w=PcQgh zy^~}zoA@#Cz|QAfS;uuS9vcx3;4X*07Mywb1Z`{1&=F^%;HQcGesc!_bEa(@Fcd~b zD)_})Y($5Vto0Pu#W3^Cx|o_2bWEZ2Nv7J)?_@7Z$s=Af_xdTj|6O91xc{YEJM z1nxE|x!6)I!m_t54<3+$dKngZAD{sI^ZNl8LCCOHL(UBSYJPOO?*V>;q+cO|8N7TL z-+O#Kbn*sL0Y6DbKP{?1eK(xX%)G7Ec3n$4Yt%1GAseUe4qN1P#;3*;q0&Z4?A|2v zES{!p5>r{0R(3q{pWfcA!+EvwdOFX&f2D{3iDkJXPhAdp$L4@E(Y-E^-1NH?UUA}; zy>c+R$^02+i_J`>X?1HK%dJ}=iCjBIN;@*)v$K{O%}QUbUok9wK@XAy5ycx*!fSlG zC;}SF$#)1QyL>{+xSzo;C2`LV4&uOVp#nu_&n}TJub6jnxRBLv@6u?`fCHIlP81nN z$=sxnc{s=vGi!%)qC5hG%x+h-OZ@@gaTCbmI{&WZD#XwBx&FOI$C43!aq)isA!f;b zC{eENJI=-2Cx}9xtq2f?bC{`=y*=ThjPhle390%sLwt@eBIcXUC6Mz zdzmky(tjxZyqdb^q&H^iv#Bg0)>Y^5!yxlj@32qU-xzP~a3+b^4U=OfvQNx;>U9TT z9}R-NBmOegz)H5%NPTX2nTnh4jYv20jbO>sWZ#LOYhA)Pf08Kktw$8{OgW}Ad~{g$ zwtB1$&GUY=65!y-m;WRvc6jJhB;w$pc$0+M`3x1v%V;Dt2VXuByxWe`_+>=zqznCH z+%rbG8Wt1GteM}@YjAZ#UME5H;@eeSUBX%z^Ki~xE@kyC=VwFw!Irh5#n+!$STlAp zPIID=$;ig3IkBNF;*wmyqQ`2y4_6&hLg{2fl`TfJQ)VaeP<`rBPUp|mOMf9f~E|qe97c3RvkKAu+NNBea6Zy`)|?Z0{AF0C$dhkEOG2VxuyV)GW=fo6VLo+sNdgO(kJn{sb2%pWZQ-9( zDgHgJFt+#r?q*Qf%LQ8Pgc^*o@)~Yc;)4o~7(gpg2ra)Cy-~_P-lp=omSR5m5nj7f zyUl|o8{vPTOn?(bBcR0OJ!)V!l3=QS3L?+%J6d@3cy;HqRG#=%zrh5D;(qXm4H)xg z6{X$px9d~SFB($fnNX7)RJ8OYQ2p~Wq!207XvFF15b=pbTOanzEA*pUR#E1J_hH3=ww=kMW$%&9+Y-V|eT7*;t3S^|sG{1{avw6f zl#`$v6>R8jWxN?FYrmH-@_JuXK7475+$p{alv~7_++VFqV!D!4CH_5jrM!1$@|IS) z^&@d7Z6|q8uXMtNx)9-2=%bTc5IZ%5X0>vsuP(xU*fp>RoU2w}qYIC$yQF;Yi%vk8w^H!#qt5AJzXN!5o(6SUmKmWD26Na& z-RsG8vF)L`dY$SQ3j4A!l8^t>24R- zi-*J!%gQ$J|J~}u)Ake-Ol%p$5z!YZKpd&#iH9m(H;<2Pc5sRnUrlis#z z%gneJGI<-!)c?g4^=ndJUqzyn&v{Myj)+>nt+d2;KK>hYhDFWmA4;ueeDhDGDBSYa zz$aK%XV+NXUs+62i}-I)=uAZe@>f5jR>iSWv>Pf;mx&Pt8MveZ3g0N23RKamnp}ya z1tQme^nNNom~W6`W>t_*VvhjH7yDEGBmi4fOKlL?e4Zu&G3|zruLxL@=8lYgJyuxV z4$c>|jJ~7C|4(%5HDtgeVv+wCn&W5uY7XbPU__NTg+WaDMWb0%QePzT9TIE$K^IgN zYbCSwniV`#0Nrb`4D~uwlZo-FdNxe4mwq!tQNA|;RUTs&Y5}k9mmxkCJ7UF6ckl4l zx5Q#itE;PXe*Hp?x!c4VI^`m?@_3#6KktdAT#$jhrWLfL|F{8fyAVSnE~^MvMe1`J z150E!R-|@nboMJ;XR8T~97Hk5#i78LhiWYs_J%L(T+Mys2fkOMONKWurOJ!5k)EQA zf`+>X>!91l7sFG}7p~+0kj-@E8h3Pf8ya$l9G~lIK>sH^2m2jgc7pGD5~ac3Fm!oiX;1rLkn+$r6P;p+{w3vsOyhjO>((_hz2=o;C@bsU1Nu%X+v*1my{(VPb1z)NFEp??~f@yf$AV`ymnw$49$%KNCl@CP2nV z_!Af`O1&;$zF+TyyRon>&$4a0H4MYyQJP=rmABUi34!^|7K-UPZUZaZvz#U~p z(O0CZ@nrqpc`VaKj4U{pC8%kF5FT5lk^^CS6iFDA^11$JTeB@(Yv-q|GT-+}BB?5n zGV8h~{2%;1rS%|$o&_K_$S#v30YtqoxKxj~|6sjuFqmD~z$9f8muX4LKCo0YC5XG1 zfIMwmpdxF*V){BYzTcQ*Ttl+rmgr5{-!M6woTi9 z9hBIGAgg$<@Kn^P!nxj)e3yp3It*-DOn}ss^VN9mE=MojGK8}Mkvix@YFmeg7hV4{ z9t7%o$&2R8KHA%j{i};(&K3!Oz|bZ-C->NtKK?_F+3o^|P#!=sqpe#qcO+;PMaIv! zZw7;!U+C6S4;eVcj!2L0eII-@QC(?wSac(#k+M^>axO-l>bkY`$9oTkj{ze1X08`Y z#aB4@n!CT?Jnnr7ED#=1nd>LXp;8a=p?eUD(*1?_m3_R4G z)?7;y;;b+OJKJ(i8c&>Ngi}{<^z`9D;a;r!Ooy<5!hUaWg$N_;2PtV6u$Q3EhMF9a z#F#1Dnm2V^PaTfCW43-qp85j(h0FH|J+Jk82Ag8m!^(38D)D6*q8@1?z^@01fgI{o^c!$zmJ^49|y)l)4Y$(w_Igw$V+0LmZFct|9??4<1nP)hH-DVYw z32#t0d({%NNjXGOyQRnnh$4ED!ek^`nZhiy;3h9rOOM74QPDWjew6O$GEri#7`#*~ zxDoyl!T^R@Yh(-B`i zw^q-HVD{@iP1qh>-hOv#Tj^7_CVQ%Urln-yB>-iy>!kvxC8<)NXnb0!Y;Ao~BUc`;ps;n7&DUjDb z8os$i>+f#VV2i$Xb;A204G3_Bqm%4f@2b^P5$&q<8jS@$CwWa$*n(!hcpnR>7SCmV zV9&m^On;w-$M#c5pJkPmIrF;P+xZ5|xHAQ8)EuI9XV6pxb(Z!Ma#;2H8!fzVo%4zn zV?h-(v|iNWd2I>%>930{!1u&7*;=~sEsl$$Qi01l!P4x=3t(XN2}PK`H17DK@Nn&z z`Pw32Y$?aiP4lbMt-$2&{>YQkxHCBP&);c zU{;Q0hjs<11AB@{>10Wr;kAg88^W{;W90*hWz^%*9UrbC&6}5#qFk7FTyfG7BW>3Z%AG~ialB!6+pPJ)x^8v$`0Y5m~ithrk!m~x=xhfBr zNkWlSNoZ?O=9SoadWTPpSNM(8?x%!GdsceAj=E^GF&Q{h^6w-x2ll&gM{0A8;^=zh z@1mz0u8~$*dEIsK{Ay|7T-UGA3hPnZj^bl~oL20gSP(GoI@gww68*H4dcpa;_%Jp;K0Fe+@gfo-0m}w3MdJ zTpfqFINfl<_@RwaDV_H*Jx>BNHN~I=&%{A^ZgBx#p-wyZ`Yw2064Kv8)0Qji4m8BykOs z4eq4;TuK2YOS#uprFtd;gh-W+r%Ehu)5Gfg{_^`3-BdfcoHdjCFZe>`QF3ch8?v4% zK`VNfS%-Ft)iP*8QdP8v3p>PUbXfA7(gMsP>vTq-qwJU`3ONmqhmyF;8jH+XVHlRd z)PNH&svzeK*NUd5*v^hicTu{q=f*5>p%KJJ1nWS#<9Hn{!^@^AV5p73p)^FWTzP7Z zzMu{-OIb{n7gr7rQDs-2D>Vy+HG3!LIO~ZNuzz}EzPQD&k_9{@K-E`6_V^&yF zQB`(bEri1#6)o5gRMgTi!!=EqFaCjyfM=(p1CoU(E z-9X`a$Gr+~&ldSVWcwnVuw+{AT(HnRSo_h(odIeYE*!7sixTdqgjz<|-3$j2dK2kH z6bf3R4pEbJRU2Uc&j zJ^IvX+xV2w8gutpOv@3M@(a1DEG#y>lJ5K2RiJ^S=qGcCU|nwkLp z9_RBSh7IjjSr#2HpdsN-vpgIrg`H0Cm@zJzfGKlPyNUBbrh?z~+IQRgelu}<2q-1m zT3-9ta1C$Z8>&+yFkd^}6CoGe>#2mMQe(#4W(9*JQ8dw?#DP1Gg?Rpif!Lzd2A5U2 z)cJvStRiJLbrHKCCIfx~VK2DzpsfSZXisY`-cN}+nIGlS?Z?;C zP8s_WP26C*q69vM$5Eskr`?mex_^XfKaFzG4tI_?u2$u?@MZ(I)8}Cloh*Gg7)-Um zAR+m$9(+xkiIW$t)<<$DPn;!LjyvpB*C)u~&r+l{Wb#qzHH|w=fE5tlTF2R8%pwJK znM)V3GmWzD{$`O%8B7!uk=cC&=G;ngDwL%w_S?-@mi^x!?dJldXFODc5+CI!*zj*3 z%9{HaFFG>8lxp96zf8%zRVmN$UEOZPCsG#lMqDj@XTY5ai~YV49McrN!f}u@@3?#B zZ!aw2a|de_0_r&YB)2t8#xrl}*ZrM7Bts$LDdO$YcFVUWq_B;55cpWxu>?qUJRyq7 zlhC6Vls+TVMkHy*(+QYi6dhVUDb!Q&noOSAtiCpn=A~IspJVcb>Q`13R{mbwjp5tm zlu_+u0%YY;;{<-H zR4o(1RyDENxKZg*ZK|r1bw|@#ywq)?#31ArjoWh5+L@9Oe03$ACLfhu+xZRiH^VjZ z7*mn<7CI~KaByc0&MxIkKp&%m{t5m4X2*i%?*5j;DNi6X1wyA4iC-!(0K*`AcN=7w zLjgI35DtuMm7hL$FMzFAQSKWv&2;MORufOM%Vags9>e)fb$s`u&zg*N8>MD?9wSHj z6?!UzBr!&q{KUvL=T{TSf65zv{^Vo)cac#%th7FkF}vmYOsDsaVoS&q5eD{$0(K)#bFmA3J=9S&9=xOAb8;>F93@6j3X4zT z9=1UOH_*+JqkYa~%R+?t0{co(8PY88YfkL2=#G4h9W7)U{-pn1)l8q)qkSOWZc0$r zJh#KtlaT;9mC_#{M4d0)%l&m;?%5YZ2ew$)>`@;6`S05biYpAeyH>}1+jKdxXEVpT z^=4pznM*Rj_=X5*L(&!FP9P5Ri{+-H05zFBcR}qY{tO0y>Xw~bZ`kM*`R9JPY^@5_ zqzHn47jXJyR7wAL~yOK5@dL7o;|Ct-l+H`Ek9o z3aGSrJNc1~6A)3Fe)z-<^xT?sm}*u~fbLBENy(t=Q(u7oK_C7NDDf(`P?8OfQzDPwXIVg3c^m#I zFww^k>m4WTA94chFVPJ}L7`MtZxLU_3wt*%fBQ`x#EN4zXOhK|QCrpU z`_ click "Draft a new release" on the far right, fill in the tag name (if you didn't tag above, it will be made here), fill in a release name like "Version X.Y.Z", and copy-and-paste the markdown-formatted (!) changelog into the description (usually ``cat docs/changelog.rst | pandoc -f rst -t gfm``). Check "pre-release" if this is a beta/RC. - - CLI method: with ``gh`` installed, run ``gh release create vX.Y.Z -t "Version X.Y.Z"`` If this is a pre-release, add ``-p``. @@ -90,9 +91,7 @@ If you need to manually upload releases, you can download the releases from the .. code-block:: bash - python3 -m pip install build - python3 -m build - PYBIND11_SDIST_GLOBAL=1 python3 -m build + nox -s build twine upload dist/* This makes SDists and wheels, and the final line uploads them. diff --git a/ext/pybind11/docs/requirements.txt b/ext/pybind11/docs/requirements.txt index 8f293b5d34..d2a9ae1645 100644 --- a/ext/pybind11/docs/requirements.txt +++ b/ext/pybind11/docs/requirements.txt @@ -1,8 +1,6 @@ -breathe==4.26.1 -# docutils 0.17 breaks HTML tags & RTD theme -# https://github.com/sphinx-doc/sphinx/issues/9001 -docutils==0.16 -sphinx==3.3.1 -sphinx_rtd_theme==0.5.0 -sphinxcontrib-moderncmakedomain==3.17 -sphinxcontrib-svg2pdfconverter==1.1.0 +breathe==4.34.0 +furo==2022.6.21 +sphinx==5.0.2 +sphinx-copybutton==0.5.0 +sphinxcontrib-moderncmakedomain==3.21.4 +sphinxcontrib-svg2pdfconverter==1.2.0 diff --git a/ext/pybind11/docs/upgrade.rst b/ext/pybind11/docs/upgrade.rst index 69609ca284..6a9db2d08f 100644 --- a/ext/pybind11/docs/upgrade.rst +++ b/ext/pybind11/docs/upgrade.rst @@ -17,6 +17,10 @@ v2.9 converted to using ``py::module_::import("types").attr("SimpleNamespace")`` instead. +* The use of ``_`` in custom type casters can now be replaced with the more + readable ``const_name`` instead. The old ``_`` shortcut has been retained + unless it is being used as a macro (like for gettext). + .. _upgrade-guide-2.7: @@ -520,7 +524,7 @@ include a declaration of the form: PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr) -Continuing to do so won’t cause an error or even a deprecation warning, +Continuing to do so won't cause an error or even a deprecation warning, but it's completely redundant. diff --git a/ext/pybind11/include/pybind11/attr.h b/ext/pybind11/include/pybind11/attr.h index 0dedbc08dd..b5e3b7b22c 100644 --- a/ext/pybind11/include/pybind11/attr.h +++ b/ext/pybind11/include/pybind11/attr.h @@ -10,6 +10,7 @@ #pragma once +#include "detail/common.h" #include "cast.h" #include @@ -20,65 +21,72 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) /// @{ /// Annotation for methods -struct is_method { handle class_; +struct is_method { + handle class_; explicit is_method(const handle &c) : class_(c) {} }; /// Annotation for operators -struct is_operator { }; +struct is_operator {}; /// Annotation for classes that cannot be subclassed -struct is_final { }; +struct is_final {}; /// Annotation for parent scope -struct scope { handle value; +struct scope { + handle value; explicit scope(const handle &s) : value(s) {} }; /// Annotation for documentation -struct doc { const char *value; +struct doc { + const char *value; explicit doc(const char *value) : value(value) {} }; /// Annotation for function names -struct name { const char *value; +struct name { + const char *value; explicit name(const char *value) : value(value) {} }; /// Annotation indicating that a function is an overload associated with a given "sibling" -struct sibling { handle value; +struct sibling { + handle value; explicit sibling(const handle &value) : value(value.ptr()) {} }; /// Annotation indicating that a class derives from another given type -template struct base { +template +struct base { - PYBIND11_DEPRECATED("base() was deprecated in favor of specifying 'T' as a template argument to class_") - base() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute + PYBIND11_DEPRECATED( + "base() was deprecated in favor of specifying 'T' as a template argument to class_") + base() = default; }; /// Keep patient alive while nurse lives -template struct keep_alive { }; +template +struct keep_alive {}; /// Annotation indicating that a class is involved in a multiple inheritance relationship -struct multiple_inheritance { }; +struct multiple_inheritance {}; /// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class -struct dynamic_attr { }; +struct dynamic_attr {}; /// Annotation which enables the buffer protocol for a type -struct buffer_protocol { }; +struct buffer_protocol {}; /// Annotation which requests that a special metaclass is created for a type struct metaclass { handle value; PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.") - // NOLINTNEXTLINE(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute - metaclass() {} + metaclass() = default; /// Override pybind11's default metaclass - explicit metaclass(handle value) : value(value) { } + explicit metaclass(handle value) : value(value) {} }; /// Specifies a custom callback with signature `void (PyHeapTypeObject*)` that @@ -99,15 +107,16 @@ struct custom_type_setup { }; /// Annotation that marks a class as local to the module: -struct module_local { const bool value; +struct module_local { + const bool value; constexpr explicit module_local(bool v = true) : value(v) {} }; /// Annotation to mark enums as an arithmetic type -struct arithmetic { }; +struct arithmetic {}; /// Mark a function for addition at the beginning of the existing overload chain instead of the end -struct prepend { }; +struct prepend {}; /** \rst A call policy which places one or more guard variables (``Ts...``) around the function call. @@ -127,9 +136,13 @@ struct prepend { }; return foo(args...); // forwarded arguments }); \endrst */ -template struct call_guard; +template +struct call_guard; -template <> struct call_guard<> { using type = detail::void_type; }; +template <> +struct call_guard<> { + using type = detail::void_type; +}; template struct call_guard { @@ -154,7 +167,8 @@ PYBIND11_NAMESPACE_BEGIN(detail) enum op_id : int; enum op_type : int; struct undefined_t; -template struct op_; +template +struct op_; void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret); /// Internal data structure which holds metadata about a keyword argument @@ -166,15 +180,16 @@ struct argument_record { bool none : 1; ///< True if None is allowed when loading argument_record(const char *name, const char *descr, handle value, bool convert, bool none) - : name(name), descr(descr), value(value), convert(convert), none(none) { } + : name(name), descr(descr), value(value), convert(convert), none(none) {} }; -/// Internal data structure which holds metadata about a bound function (signature, overloads, etc.) +/// Internal data structure which holds metadata about a bound function (signature, overloads, +/// etc.) struct function_record { function_record() : is_constructor(false), is_new_style_constructor(false), is_stateless(false), - is_operator(false), is_method(false), has_args(false), - has_kwargs(false), has_kw_only_args(false), prepend(false) { } + is_operator(false), is_method(false), has_args(false), has_kwargs(false), + prepend(false) {} /// Function name char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ @@ -189,13 +204,13 @@ struct function_record { std::vector args; /// Pointer to lambda function which converts arguments and performs the actual call - handle (*impl) (function_call &) = nullptr; + handle (*impl)(function_call &) = nullptr; /// Storage for the wrapped function pointer and captured data, if any - void *data[3] = { }; + void *data[3] = {}; /// Pointer to custom destructor for 'data' (if needed) - void (*free_data) (function_record *ptr) = nullptr; + void (*free_data)(function_record *ptr) = nullptr; /// Return value policy associated with this function return_value_policy policy = return_value_policy::automatic; @@ -221,17 +236,15 @@ struct function_record { /// True if the function has a '**kwargs' argument bool has_kwargs : 1; - /// True once a 'py::kw_only' is encountered (any following args are keyword-only) - bool has_kw_only_args : 1; - /// True if this function is to be inserted at the beginning of the overload resolution chain bool prepend : 1; /// Number of arguments (including py::args and/or py::kwargs, if present) std::uint16_t nargs; - /// Number of trailing arguments (counted in `nargs`) that are keyword-only - std::uint16_t nargs_kw_only = 0; + /// Number of leading positional arguments, which are terminated by a py::args or py::kwargs + /// argument or by a py::kw_only annotation. + std::uint16_t nargs_pos = 0; /// Number of leading arguments (counted in `nargs`) that are positional-only std::uint16_t nargs_pos_only = 0; @@ -253,7 +266,7 @@ struct function_record { struct type_record { PYBIND11_NOINLINE type_record() : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false), - default_holder(true), module_local(false), is_final(false) { } + default_holder(true), module_local(false), is_final(false) {} /// Handle to the parent scope handle scope; @@ -312,42 +325,45 @@ struct type_record { /// Is the class inheritable from python classes? bool is_final : 1; - PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) { - auto base_info = detail::get_type_info(base, false); + PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *) ) { + auto *base_info = detail::get_type_info(base, false); if (!base_info) { std::string tname(base.name()); detail::clean_type_id(tname); - pybind11_fail("generic_type: type \"" + std::string(name) + - "\" referenced unknown base type \"" + tname + "\""); + pybind11_fail("generic_type: type \"" + std::string(name) + + "\" referenced unknown base type \"" + tname + "\""); } if (default_holder != base_info->default_holder) { std::string tname(base.name()); detail::clean_type_id(tname); - pybind11_fail("generic_type: type \"" + std::string(name) + "\" " + - (default_holder ? "does not have" : "has") + - " a non-default holder type while its base \"" + tname + "\" " + - (base_info->default_holder ? "does not" : "does")); + pybind11_fail("generic_type: type \"" + std::string(name) + "\" " + + (default_holder ? "does not have" : "has") + + " a non-default holder type while its base \"" + tname + "\" " + + (base_info->default_holder ? "does not" : "does")); } bases.append((PyObject *) base_info->type); - if (base_info->type->tp_dictoffset != 0) - dynamic_attr = true; +#if PY_VERSION_HEX < 0x030B0000 + dynamic_attr |= base_info->type->tp_dictoffset != 0; +#else + dynamic_attr |= (base_info->type->tp_flags & Py_TPFLAGS_MANAGED_DICT) != 0; +#endif - if (caster) + if (caster) { base_info->implicit_casts.emplace_back(type, caster); + } } }; -inline function_call::function_call(const function_record &f, handle p) : - func(f), parent(p) { +inline function_call::function_call(const function_record &f, handle p) : func(f), parent(p) { args.reserve(f.nargs); args_convert.reserve(f.nargs); } /// Tag for a new-style `__init__` defined in `detail/init.h` -struct is_new_style_constructor { }; +struct is_new_style_constructor {}; /** * Partial template specializations to process custom attributes provided to @@ -355,129 +371,177 @@ struct is_new_style_constructor { }; * fields in the type_record and function_record data structures or executed at * runtime to deal with custom call policies (e.g. keep_alive). */ -template struct process_attribute; +template +struct process_attribute; -template struct process_attribute_default { +template +struct process_attribute_default { /// Default implementation: do nothing - static void init(const T &, function_record *) { } - static void init(const T &, type_record *) { } - static void precall(function_call &) { } - static void postcall(function_call &, handle) { } + static void init(const T &, function_record *) {} + static void init(const T &, type_record *) {} + static void precall(function_call &) {} + static void postcall(function_call &, handle) {} }; /// Process an attribute specifying the function's name -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const name &n, function_record *r) { r->name = const_cast(n.value); } }; /// Process an attribute specifying the function's docstring -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const doc &n, function_record *r) { r->doc = const_cast(n.value); } }; /// Process an attribute specifying the function's docstring (provided as a C-style string) -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const char *d, function_record *r) { r->doc = const_cast(d); } - static void init(const char *d, type_record *r) { r->doc = const_cast(d); } + static void init(const char *d, type_record *r) { r->doc = d; } }; -template <> struct process_attribute : process_attribute { }; +template <> +struct process_attribute : process_attribute {}; /// Process an attribute indicating the function's return value policy -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const return_value_policy &p, function_record *r) { r->policy = p; } }; -/// Process an attribute which indicates that this is an overloaded function associated with a given sibling -template <> struct process_attribute : process_attribute_default { +/// Process an attribute which indicates that this is an overloaded function associated with a +/// given sibling +template <> +struct process_attribute : process_attribute_default { static void init(const sibling &s, function_record *r) { r->sibling = s.value; } }; /// Process an attribute which indicates that this function is a method -template <> struct process_attribute : process_attribute_default { - static void init(const is_method &s, function_record *r) { r->is_method = true; r->scope = s.class_; } +template <> +struct process_attribute : process_attribute_default { + static void init(const is_method &s, function_record *r) { + r->is_method = true; + r->scope = s.class_; + } }; /// Process an attribute which indicates the parent scope of a method -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const scope &s, function_record *r) { r->scope = s.value; } }; /// Process an attribute which indicates that this function is an operator -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const is_operator &, function_record *r) { r->is_operator = true; } }; -template <> struct process_attribute : process_attribute_default { - static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; } +template <> +struct process_attribute + : process_attribute_default { + static void init(const is_new_style_constructor &, function_record *r) { + r->is_new_style_constructor = true; + } }; -inline void process_kw_only_arg(const arg &a, function_record *r) { - if (!a.name || a.name[0] == '\0') - pybind11_fail("arg(): cannot specify an unnamed argument after an kw_only() annotation"); - ++r->nargs_kw_only; +inline void check_kw_only_arg(const arg &a, function_record *r) { + if (r->args.size() > r->nargs_pos && (!a.name || a.name[0] == '\0')) { + pybind11_fail("arg(): cannot specify an unnamed argument after a kw_only() annotation or " + "args() argument"); + } +} + +inline void append_self_arg_if_needed(function_record *r) { + if (r->is_method && r->args.empty()) { + r->args.emplace_back("self", nullptr, handle(), /*convert=*/true, /*none=*/false); + } } /// Process a keyword argument attribute (*without* a default value) -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const arg &a, function_record *r) { - if (r->is_method && r->args.empty()) - r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/); + append_self_arg_if_needed(r); r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none); - if (r->has_kw_only_args) process_kw_only_arg(a, r); + check_kw_only_arg(a, r); } }; /// Process a keyword argument attribute (*with* a default value) -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const arg_v &a, function_record *r) { - if (r->is_method && r->args.empty()) - r->args.emplace_back("self", nullptr /*descr*/, handle() /*parent*/, true /*convert*/, false /*none not allowed*/); + if (r->is_method && r->args.empty()) { + r->args.emplace_back( + "self", /*descr=*/nullptr, /*parent=*/handle(), /*convert=*/true, /*none=*/false); + } if (!a.value) { -#if !defined(NDEBUG) +#if defined(PYBIND11_DETAILED_ERROR_MESSAGES) std::string descr("'"); - if (a.name) descr += std::string(a.name) + ": "; + if (a.name) { + descr += std::string(a.name) + ": "; + } descr += a.type + "'"; if (r->is_method) { - if (r->name) - descr += " in method '" + (std::string) str(r->scope) + "." + (std::string) r->name + "'"; - else + if (r->name) { + descr += " in method '" + (std::string) str(r->scope) + "." + + (std::string) r->name + "'"; + } else { descr += " in method of '" + (std::string) str(r->scope) + "'"; + } } else if (r->name) { descr += " in function '" + (std::string) r->name + "'"; } - pybind11_fail("arg(): could not convert default argument " - + descr + " into a Python object (type not registered yet?)"); + pybind11_fail("arg(): could not convert default argument " + descr + + " into a Python object (type not registered yet?)"); #else pybind11_fail("arg(): could not convert default argument " "into a Python object (type not registered yet?). " - "Compile in debug mode for more information."); + "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for " + "more information."); #endif } r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none); - if (r->has_kw_only_args) process_kw_only_arg(a, r); + check_kw_only_arg(a, r); } }; /// Process a keyword-only-arguments-follow pseudo argument -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const kw_only &, function_record *r) { - r->has_kw_only_args = true; + append_self_arg_if_needed(r); + if (r->has_args && r->nargs_pos != static_cast(r->args.size())) { + pybind11_fail("Mismatched args() and kw_only(): they must occur at the same relative " + "argument location (or omit kw_only() entirely)"); + } + r->nargs_pos = static_cast(r->args.size()); } }; /// Process a positional-only-argument maker -template <> struct process_attribute : process_attribute_default { +template <> +struct process_attribute : process_attribute_default { static void init(const pos_only &, function_record *r) { + append_self_arg_if_needed(r); r->nargs_pos_only = static_cast(r->args.size()); + if (r->nargs_pos_only > r->nargs_pos) { + pybind11_fail("pos_only(): cannot follow a py::args() argument"); + } + // It also can't follow a kw_only, but a static_assert in pybind11.h checks that } }; -/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees that) +/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees +/// that) template -struct process_attribute::value>> : process_attribute_default { +struct process_attribute::value>> + : process_attribute_default { static void init(const handle &h, type_record *r) { r->bases.append(h); } }; @@ -490,7 +554,9 @@ struct process_attribute> : process_attribute_default> { /// Process a multiple inheritance attribute template <> struct process_attribute : process_attribute_default { - static void init(const multiple_inheritance &, type_record *r) { r->multiple_inheritance = true; } + static void init(const multiple_inheritance &, type_record *r) { + r->multiple_inheritance = true; + } }; template <> @@ -536,34 +602,41 @@ template <> struct process_attribute : process_attribute_default {}; template -struct process_attribute> : process_attribute_default> { }; +struct process_attribute> : process_attribute_default> {}; /** * Process a keep_alive call policy -- invokes keep_alive_impl during the * pre-call handler if both Nurse, Patient != 0 and use the post-call handler * otherwise */ -template struct process_attribute> : public process_attribute_default> { +template +struct process_attribute> + : public process_attribute_default> { template = 0> - static void precall(function_call &call) { keep_alive_impl(Nurse, Patient, call, handle()); } + static void precall(function_call &call) { + keep_alive_impl(Nurse, Patient, call, handle()); + } template = 0> - static void postcall(function_call &, handle) { } + static void postcall(function_call &, handle) {} template = 0> - static void precall(function_call &) { } + static void precall(function_call &) {} template = 0> - static void postcall(function_call &call, handle ret) { keep_alive_impl(Nurse, Patient, call, ret); } + static void postcall(function_call &call, handle ret) { + keep_alive_impl(Nurse, Patient, call, ret); + } }; /// Recursively iterate over variadic template arguments -template struct process_attributes { - static void init(const Args&... args, function_record *r) { +template +struct process_attributes { + static void init(const Args &...args, function_record *r) { PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); using expander = int[]; (void) expander{ 0, ((void) process_attribute::type>::init(args, r), 0)...}; } - static void init(const Args&... args, type_record *r) { + static void init(const Args &...args, type_record *r) { PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); using expander = int[]; @@ -595,7 +668,7 @@ using extract_guard_t = typename exactly_one_t, Extr /// Check the number of named arguments at compile time template ::value...), - size_t self = constexpr_sum(std::is_same::value...)> + size_t self = constexpr_sum(std::is_same::value...)> constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) { PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(nargs, has_args, has_kwargs); return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs; diff --git a/ext/pybind11/include/pybind11/buffer_info.h b/ext/pybind11/include/pybind11/buffer_info.h index eba68d1aa1..06120d5563 100644 --- a/ext/pybind11/include/pybind11/buffer_info.h +++ b/ext/pybind11/include/pybind11/buffer_info.h @@ -19,9 +19,11 @@ PYBIND11_NAMESPACE_BEGIN(detail) inline std::vector c_strides(const std::vector &shape, ssize_t itemsize) { auto ndim = shape.size(); std::vector strides(ndim, itemsize); - if (ndim > 0) - for (size_t i = ndim - 1; i > 0; --i) + if (ndim > 0) { + for (size_t i = ndim - 1; i > 0; --i) { strides[i - 1] = strides[i] * shape[i]; + } + } return strides; } @@ -29,8 +31,9 @@ inline std::vector c_strides(const std::vector &shape, ssize_t inline std::vector f_strides(const std::vector &shape, ssize_t itemsize) { auto ndim = shape.size(); std::vector strides(ndim, itemsize); - for (size_t i = 1; i < ndim; ++i) + for (size_t i = 1; i < ndim; ++i) { strides[i] = strides[i - 1] * shape[i - 1]; + } return strides; } @@ -41,55 +44,85 @@ struct buffer_info { void *ptr = nullptr; // Pointer to the underlying storage ssize_t itemsize = 0; // Size of individual items in bytes ssize_t size = 0; // Total number of entries - std::string format; // For homogeneous buffers, this should be set to format_descriptor::format() + std::string format; // For homogeneous buffers, this should be set to + // format_descriptor::format() ssize_t ndim = 0; // Number of dimensions std::vector shape; // Shape of the tensor (1 entry per dimension) - std::vector strides; // Number of bytes between adjacent entries (for each per dimension) + std::vector strides; // Number of bytes between adjacent entries + // (for each per dimension) bool readonly = false; // flag to indicate if the underlying storage may be written to buffer_info() = default; - buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, - detail::any_container shape_in, detail::any_container strides_in, bool readonly=false) - : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), - shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) { - if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) + buffer_info(void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t ndim, + detail::any_container shape_in, + detail::any_container strides_in, + bool readonly = false) + : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), + shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) { + if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) { pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); - for (size_t i = 0; i < (size_t) ndim; ++i) + } + for (size_t i = 0; i < (size_t) ndim; ++i) { size *= shape[i]; + } } template - buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in, bool readonly=false) - : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in), readonly) { } + buffer_info(T *ptr, + detail::any_container shape_in, + detail::any_container strides_in, + bool readonly = false) + : buffer_info(private_ctr_tag(), + ptr, + sizeof(T), + format_descriptor::format(), + static_cast(shape_in->size()), + std::move(shape_in), + std::move(strides_in), + readonly) {} - buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size, bool readonly=false) - : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) { } + buffer_info(void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t size, + bool readonly = false) + : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) {} template - buffer_info(T *ptr, ssize_t size, bool readonly=false) - : buffer_info(ptr, sizeof(T), format_descriptor::format(), size, readonly) { } + buffer_info(T *ptr, ssize_t size, bool readonly = false) + : buffer_info(ptr, sizeof(T), format_descriptor::format(), size, readonly) {} template - buffer_info(const T *ptr, ssize_t size, bool readonly=true) - : buffer_info(const_cast(ptr), sizeof(T), format_descriptor::format(), size, readonly) { } + buffer_info(const T *ptr, ssize_t size, bool readonly = true) + : buffer_info( + const_cast(ptr), sizeof(T), format_descriptor::format(), size, readonly) {} explicit buffer_info(Py_buffer *view, bool ownview = true) - : buffer_info(view->buf, view->itemsize, view->format, view->ndim, + : buffer_info( + view->buf, + view->itemsize, + view->format, + view->ndim, {view->shape, view->shape + view->ndim}, /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects * ignore this flag and return a view with NULL strides. * When strides are NULL, build them manually. */ view->strides - ? std::vector(view->strides, view->strides + view->ndim) - : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize), + ? std::vector(view->strides, view->strides + view->ndim) + : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize), (view->readonly != 0)) { + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) this->m_view = view; + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) this->ownview = ownview; } buffer_info(const buffer_info &) = delete; - buffer_info& operator=(const buffer_info &) = delete; + buffer_info &operator=(const buffer_info &) = delete; buffer_info(buffer_info &&other) noexcept { (*this) = std::move(other); } @@ -108,17 +141,28 @@ struct buffer_info { } ~buffer_info() { - if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; } + if (m_view && ownview) { + PyBuffer_Release(m_view); + delete m_view; + } } Py_buffer *view() const { return m_view; } Py_buffer *&view() { return m_view; } -private: - struct private_ctr_tag { }; - buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, - detail::any_container &&shape_in, detail::any_container &&strides_in, bool readonly) - : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { } +private: + struct private_ctr_tag {}; + + buffer_info(private_ctr_tag, + void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t ndim, + detail::any_container &&shape_in, + detail::any_container &&strides_in, + bool readonly) + : buffer_info( + ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) {} Py_buffer *m_view = nullptr; bool ownview = false; @@ -126,17 +170,22 @@ private: PYBIND11_NAMESPACE_BEGIN(detail) -template struct compare_buffer_info { - static bool compare(const buffer_info& b) { +template +struct compare_buffer_info { + static bool compare(const buffer_info &b) { return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); } }; -template struct compare_buffer_info::value>> { - static bool compare(const buffer_info& b) { - return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value || - ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) || - ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n"))); +template +struct compare_buffer_info::value>> { + static bool compare(const buffer_info &b) { + return (size_t) b.itemsize == sizeof(T) + && (b.format == format_descriptor::value + || ((sizeof(T) == sizeof(long)) + && b.format == (std::is_unsigned::value ? "L" : "l")) + || ((sizeof(T) == sizeof(size_t)) + && b.format == (std::is_unsigned::value ? "N" : "n"))); } }; diff --git a/ext/pybind11/include/pybind11/cast.h b/ext/pybind11/include/pybind11/cast.h index 20fbb32587..3a40460276 100644 --- a/ext/pybind11/include/pybind11/cast.h +++ b/ext/pybind11/include/pybind11/cast.h @@ -10,11 +10,12 @@ #pragma once -#include "pytypes.h" #include "detail/common.h" #include "detail/descr.h" #include "detail/type_caster_base.h" #include "detail/typeid.h" +#include "pytypes.h" + #include #include #include @@ -27,61 +28,57 @@ #include #include -#if defined(PYBIND11_CPP17) -# if defined(__has_include) -# if __has_include() -# define PYBIND11_HAS_STRING_VIEW -# endif -# elif defined(_MSC_VER) -# define PYBIND11_HAS_STRING_VIEW -# endif -#endif -#ifdef PYBIND11_HAS_STRING_VIEW -#include -#endif - -#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L -# define PYBIND11_HAS_U8STRING -#endif - PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_WARNING_DISABLE_MSVC(4127) + PYBIND11_NAMESPACE_BEGIN(detail) -template class type_caster : public type_caster_base { }; -template using make_caster = type_caster>; +template +class type_caster : public type_caster_base {}; +template +using make_caster = type_caster>; // Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T -template typename make_caster::template cast_op_type cast_op(make_caster &caster) { +template +typename make_caster::template cast_op_type cast_op(make_caster &caster) { return caster.operator typename make_caster::template cast_op_type(); } -template typename make_caster::template cast_op_type::type> +template +typename make_caster::template cast_op_type::type> cast_op(make_caster &&caster) { - return std::move(caster).operator - typename make_caster::template cast_op_type::type>(); + return std::move(caster).operator typename make_caster:: + template cast_op_type::type>(); } -template class type_caster> { +template +class type_caster> { private: using caster_t = make_caster; caster_t subcaster; - using reference_t = type&; - using subcaster_cast_op_type = - typename caster_t::template cast_op_type; + using reference_t = type &; + using subcaster_cast_op_type = typename caster_t::template cast_op_type; + + static_assert( + std::is_same::type &, subcaster_cast_op_type>::value + || std::is_same::value, + "std::reference_wrapper caster requires T to have a caster with an " + "`operator T &()` or `operator const T &()`"); - static_assert(std::is_same::type &, subcaster_cast_op_type>::value || - std::is_same::value, - "std::reference_wrapper caster requires T to have a caster with an " - "`operator T &()` or `operator const T &()`"); public: bool load(handle src, bool convert) { return subcaster.load(src, convert); } static constexpr auto name = caster_t::name; - static handle cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { + static handle + cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { // It is definitely wrong to take ownership of this pointer, so mask that rvp - if (policy == return_value_policy::take_ownership || policy == return_value_policy::automatic) + if (policy == return_value_policy::take_ownership + || policy == return_value_policy::automatic) { policy = return_value_policy::automatic_reference; + } return caster_t::cast(&src.get(), policy, parent); } - template using cast_op_type = std::reference_wrapper; + template + using cast_op_type = std::reference_wrapper; explicit operator std::reference_wrapper() { return cast_op(subcaster); } }; @@ -91,11 +88,16 @@ protected: \ public: \ static constexpr auto name = py_name; \ - template >::value, int> = 0> \ - static handle cast(T_ *src, return_value_policy policy, handle parent) { \ + template >::value, \ + int> \ + = 0> \ + static ::pybind11::handle cast( \ + T_ *src, ::pybind11::return_value_policy policy, ::pybind11::handle parent) { \ if (!src) \ - return none().release(); \ - if (policy == return_value_policy::take_ownership) { \ + return ::pybind11::none().release(); \ + if (policy == ::pybind11::return_value_policy::take_ownership) { \ auto h = cast(std::move(*src), policy, parent); \ delete src; \ return h; \ @@ -106,31 +108,33 @@ public: operator type &() { return value; } /* NOLINT(bugprone-macro-parentheses) */ \ operator type &&() && { return std::move(value); } /* NOLINT(bugprone-macro-parentheses) */ \ template \ - using cast_op_type = pybind11::detail::movable_cast_op_type + using cast_op_type = ::pybind11::detail::movable_cast_op_type -template using is_std_char_type = any_of< - std::is_same, /* std::string */ +template +using is_std_char_type = any_of, /* std::string */ #if defined(PYBIND11_HAS_U8STRING) - std::is_same, /* std::u8string */ + std::is_same, /* std::u8string */ #endif - std::is_same, /* std::u16string */ - std::is_same, /* std::u32string */ - std::is_same /* std::wstring */ ->; - + std::is_same, /* std::u16string */ + std::is_same, /* std::u32string */ + std::is_same /* std::wstring */ + >; template struct type_caster::value && !is_std_char_type::value>> { using _py_type_0 = conditional_t; - using _py_type_1 = conditional_t::value, _py_type_0, typename std::make_unsigned<_py_type_0>::type>; + using _py_type_1 = conditional_t::value, + _py_type_0, + typename std::make_unsigned<_py_type_0>::type>; using py_type = conditional_t::value, double, _py_type_1>; -public: +public: bool load(handle src, bool convert) { py_type py_value; - if (!src) + if (!src) { return false; + } #if !defined(PYPY_VERSION) auto index_check = [](PyObject *o) { return PyIndex_Check(o); }; @@ -141,25 +145,26 @@ public: #endif if (std::is_floating_point::value) { - if (convert || PyFloat_Check(src.ptr())) + if (convert || PyFloat_Check(src.ptr())) { py_value = (py_type) PyFloat_AsDouble(src.ptr()); - else + } else { return false; + } } else if (PyFloat_Check(src.ptr()) || (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr()))) { return false; } else { handle src_or_index = src; -#if PY_VERSION_HEX < 0x03080000 + // PyPy: 7.3.7's 3.8 does not implement PyLong_*'s __index__ calls. +#if PY_VERSION_HEX < 0x03080000 || defined(PYPY_VERSION) object index; - if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) + if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) index = reinterpret_steal(PyNumber_Index(src.ptr())); if (!index) { PyErr_Clear(); if (!convert) return false; - } - else { + } else { src_or_index = index; } } @@ -168,8 +173,8 @@ public: py_value = as_unsigned(src_or_index.ptr()); } else { // signed integer: py_value = sizeof(T) <= sizeof(long) - ? (py_type) PyLong_AsLong(src_or_index.ptr()) - : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr()); + ? (py_type) PyLong_AsLong(src_or_index.ptr()) + : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr()); } } @@ -178,12 +183,14 @@ public: // Check to see if the conversion is valid (integers should match exactly) // Signed/unsigned checks happen elsewhere - if (py_err || (std::is_integral::value && sizeof(py_type) != sizeof(T) && py_value != (py_type) (T) py_value)) { + if (py_err + || (std::is_integral::value && sizeof(py_type) != sizeof(T) + && py_value != (py_type) (T) py_value)) { PyErr_Clear(); if (py_err && convert && (PyNumber_Check(src.ptr()) != 0)) { auto tmp = reinterpret_steal(std::is_floating_point::value - ? PyNumber_Float(src.ptr()) - : PyNumber_Long(src.ptr())); + ? PyNumber_Float(src.ptr()) + : PyNumber_Long(src.ptr())); PyErr_Clear(); return load(tmp, false); } @@ -194,55 +201,67 @@ public: return true; } - template + template static typename std::enable_if::value, handle>::type cast(U src, return_value_policy /* policy */, handle /* parent */) { return PyFloat_FromDouble((double) src); } - template - static typename std::enable_if::value && std::is_signed::value && (sizeof(U) <= sizeof(long)), handle>::type + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) <= sizeof(long)), + handle>::type cast(U src, return_value_policy /* policy */, handle /* parent */) { return PYBIND11_LONG_FROM_SIGNED((long) src); } - template - static typename std::enable_if::value && std::is_unsigned::value && (sizeof(U) <= sizeof(unsigned long)), handle>::type + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) <= sizeof(unsigned long)), + handle>::type cast(U src, return_value_policy /* policy */, handle /* parent */) { return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src); } - template - static typename std::enable_if::value && std::is_signed::value && (sizeof(U) > sizeof(long)), handle>::type + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) > sizeof(long)), + handle>::type cast(U src, return_value_policy /* policy */, handle /* parent */) { return PyLong_FromLongLong((long long) src); } - template - static typename std::enable_if::value && std::is_unsigned::value && (sizeof(U) > sizeof(unsigned long)), handle>::type + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) > sizeof(unsigned long)), + handle>::type cast(U src, return_value_policy /* policy */, handle /* parent */) { return PyLong_FromUnsignedLongLong((unsigned long long) src); } - PYBIND11_TYPE_CASTER(T, _::value>("int", "float")); + PYBIND11_TYPE_CASTER(T, const_name::value>("int", "float")); }; -template struct void_caster { +template +struct void_caster { public: bool load(handle src, bool) { - if (src && src.is_none()) + if (src && src.is_none()) { return true; + } return false; } static handle cast(T, return_value_policy /* policy */, handle /* parent */) { - return none().inc_ref(); + return none().release(); } - PYBIND11_TYPE_CASTER(T, _("None")); + PYBIND11_TYPE_CASTER(T, const_name("None")); }; -template <> class type_caster : public void_caster {}; +template <> +class type_caster : public void_caster {}; -template <> class type_caster : public type_caster { +template <> +class type_caster : public type_caster { public: using type_caster::cast; @@ -262,7 +281,7 @@ public: } /* Check if this is a C++ type */ - auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr()); + const auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr()); if (bases.size() == 1) { // Only allowing loading from a single-value type value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr(); return true; @@ -273,24 +292,31 @@ public: } static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) { - if (ptr) + if (ptr) { return capsule(ptr).release(); - return none().inc_ref(); + } + return none().release(); } - template using cast_op_type = void*&; + template + using cast_op_type = void *&; explicit operator void *&() { return value; } - static constexpr auto name = _("capsule"); + static constexpr auto name = const_name("capsule"); + private: void *value = nullptr; }; -template <> class type_caster : public void_caster { }; +template <> +class type_caster : public void_caster {}; -template <> class type_caster { +template <> +class type_caster { public: bool load(handle src, bool convert) { - if (!src) return false; + if (!src) { + return false; + } if (src.ptr() == Py_True) { value = true; return true; @@ -304,22 +330,22 @@ public: Py_ssize_t res = -1; if (src.is_none()) { - res = 0; // None is implicitly converted to False + res = 0; // None is implicitly converted to False } - #if defined(PYPY_VERSION) - // On PyPy, check that "__bool__" (or "__nonzero__" on Python 2.7) attr exists +#if defined(PYPY_VERSION) + // On PyPy, check that "__bool__" attr exists else if (hasattr(src, PYBIND11_BOOL_ATTR)) { res = PyObject_IsTrue(src.ptr()); } - #else +#else // Alternate approach for CPython: this does the same as the above, but optimized // using the CPython API so as to avoid an unneeded attribute lookup. - else if (auto tp_as_number = src.ptr()->ob_type->tp_as_number) { + else if (auto *tp_as_number = src.ptr()->ob_type->tp_as_number) { if (PYBIND11_NB_BOOL(tp_as_number)) { res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr()); } } - #endif +#endif if (res == 0 || res == 1) { value = (res != 0); return true; @@ -331,56 +357,43 @@ public: static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { return handle(src ? Py_True : Py_False).inc_ref(); } - PYBIND11_TYPE_CASTER(bool, _("bool")); + PYBIND11_TYPE_CASTER(bool, const_name("bool")); }; // Helper class for UTF-{8,16,32} C++ stl strings: -template struct string_caster { +template +struct string_caster { using CharT = typename StringType::value_type; // Simplify life by being able to assume standard char sizes (the standard only guarantees // minimums, but Python requires exact sizes) - static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char size != 1"); + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char size != 1"); #if defined(PYBIND11_HAS_U8STRING) - static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1"); + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char8_t size != 1"); #endif - static_assert(!std::is_same::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2"); - static_assert(!std::is_same::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4"); + static_assert(!std::is_same::value || sizeof(CharT) == 2, + "Unsupported char16_t size != 2"); + static_assert(!std::is_same::value || sizeof(CharT) == 4, + "Unsupported char32_t size != 4"); // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) static_assert(!std::is_same::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, - "Unsupported wchar_t size != 2/4"); + "Unsupported wchar_t size != 2/4"); static constexpr size_t UTF_N = 8 * sizeof(CharT); bool load(handle src, bool) { -#if PY_MAJOR_VERSION < 3 - object temp; -#endif handle load_src = src; if (!src) { return false; } if (!PyUnicode_Check(load_src.ptr())) { -#if PY_MAJOR_VERSION >= 3 - return load_bytes(load_src); -#else - if (std::is_same::value) { - return load_bytes(load_src); - } - - // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false - if (!PYBIND11_BYTES_CHECK(load_src.ptr())) - return false; - - temp = reinterpret_steal(PyUnicode_FromObject(load_src.ptr())); - if (!temp) { PyErr_Clear(); return false; } - load_src = temp; -#endif + return load_raw(load_src); } -#if PY_VERSION_HEX >= 0x03030000 - // On Python >= 3.3, for UTF-8 we avoid the need for a temporary `bytes` - // object by using `PyUnicode_AsUTF8AndSize`. - if (PYBIND11_SILENCE_MSVC_C4127(UTF_N == 8)) { + // For UTF-8 we avoid the need for a temporary `bytes` object by using + // `PyUnicode_AsUTF8AndSize`. + if (UTF_N == 8) { Py_ssize_t size = -1; const auto *buffer = reinterpret_cast(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size)); @@ -391,98 +404,135 @@ template struct string_caster { value = StringType(buffer, static_cast(size)); return true; } -#endif - auto utfNbytes = reinterpret_steal(PyUnicode_AsEncodedString( - load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr)); - if (!utfNbytes) { PyErr_Clear(); return false; } + auto utfNbytes + = reinterpret_steal(PyUnicode_AsEncodedString(load_src.ptr(), + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr)); + if (!utfNbytes) { + PyErr_Clear(); + return false; + } - const auto *buffer = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); + const auto *buffer + = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT); // Skip BOM for UTF-16/32 - if (PYBIND11_SILENCE_MSVC_C4127(UTF_N > 8)) { + if (UTF_N > 8) { buffer++; length--; } value = StringType(buffer, length); // If we're loading a string_view we need to keep the encoded Python object alive: - if (IsView) + if (IsView) { loader_life_support::add_patient(utfNbytes); + } return true; } - static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + static handle + cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { const char *buffer = reinterpret_cast(src.data()); auto nbytes = ssize_t(src.size() * sizeof(CharT)); handle s = decode_utfN(buffer, nbytes); - if (!s) throw error_already_set(); + if (!s) { + throw error_already_set(); + } return s; } - PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME)); + PYBIND11_TYPE_CASTER(StringType, const_name(PYBIND11_STRING_NAME)); private: static handle decode_utfN(const char *buffer, ssize_t nbytes) { #if !defined(PYPY_VERSION) - return - UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) : - UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) : - PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); + return UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) + : UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) + : PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); #else - // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as well), - // so bypass the whole thing by just passing the encoding as a string value, which works properly: - return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr); + // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as + // well), so bypass the whole thing by just passing the encoding as a string value, which + // works properly: + return PyUnicode_Decode(buffer, + nbytes, + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr); #endif } - // When loading into a std::string or char*, accept a bytes object as-is (i.e. + // When loading into a std::string or char*, accept a bytes/bytearray object as-is (i.e. // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. // which supports loading a unicode from a str, doesn't take this path. template - bool load_bytes(enable_if_t::value, handle> src) { + bool load_raw(enable_if_t::value, handle> src) { if (PYBIND11_BYTES_CHECK(src.ptr())) { - // We were passed a Python 3 raw bytes; accept it into a std::string or char* + // We were passed raw bytes; accept it into a std::string or char* // without any encoding attempt. const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr()); - if (bytes) { - value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); - return true; + if (!bytes) { + pybind11_fail("Unexpected PYBIND11_BYTES_AS_STRING() failure."); } + value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + return true; + } + if (PyByteArray_Check(src.ptr())) { + // We were passed a bytearray; accept it into a std::string or char* + // without any encoding attempt. + const char *bytearray = PyByteArray_AsString(src.ptr()); + if (!bytearray) { + pybind11_fail("Unexpected PyByteArray_AsString() failure."); + } + value = StringType(bytearray, (size_t) PyByteArray_Size(src.ptr())); + return true; } return false; } template - bool load_bytes(enable_if_t::value, handle>) { return false; } + bool load_raw(enable_if_t::value, handle>) { + return false; + } }; template -struct type_caster, enable_if_t::value>> +struct type_caster, + enable_if_t::value>> : string_caster> {}; #ifdef PYBIND11_HAS_STRING_VIEW template -struct type_caster, enable_if_t::value>> +struct type_caster, + enable_if_t::value>> : string_caster, true> {}; #endif // Type caster for C-style strings. We basically use a std::string type caster, but also add the // ability to use None as a nullptr char* (which the string caster doesn't allow). -template struct type_caster::value>> { +template +struct type_caster::value>> { using StringType = std::basic_string; - using StringCaster = type_caster; + using StringCaster = make_caster; StringCaster str_caster; bool none = false; CharT one_char = 0; + public: bool load(handle src, bool convert) { - if (!src) return false; + if (!src) { + return false; + } if (src.is_none()) { // Defer accepting None to other overloads (if we aren't in convert mode): - if (!convert) return false; + if (!convert) { + return false; + } none = true; return true; } @@ -490,14 +540,18 @@ public: } static handle cast(const CharT *src, return_value_policy policy, handle parent) { - if (src == nullptr) return pybind11::none().inc_ref(); + if (src == nullptr) { + return pybind11::none().release(); + } return StringCaster::cast(StringType(src), policy, parent); } static handle cast(CharT src, return_value_policy policy, handle parent) { if (std::is_same::value) { handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr); - if (!s) throw error_already_set(); + if (!s) { + throw error_already_set(); + } return s; } return StringCaster::cast(StringType(1, src), policy, parent); @@ -507,20 +561,22 @@ public: return none ? nullptr : const_cast(static_cast(str_caster).c_str()); } explicit operator CharT &() { - if (none) + if (none) { throw value_error("Cannot convert None to a character"); + } auto &value = static_cast(str_caster); size_t str_len = value.size(); - if (str_len == 0) + if (str_len == 0) { throw value_error("Cannot convert empty string to a character"); + } // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that - // is too high, and one for multiple unicode characters (caught later), so we need to figure - // out how long the first encoded character is in bytes to distinguish between these two - // errors. We also allow want to allow unicode characters U+0080 through U+00FF, as those - // can fit into a single char value. - if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 8) && str_len > 1 && str_len <= 4) { + // is too high, and one for multiple unicode characters (caught later), so we need to + // figure out how long the first encoded character is in bytes to distinguish between these + // two errors. We also allow want to allow unicode characters U+0080 through U+00FF, as + // those can fit into a single char value. + if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) { auto v0 = static_cast(value[0]); // low bits only: 0-127 // 0b110xxxxx - start of 2-byte sequence @@ -534,7 +590,8 @@ public: if (char0_bytes == str_len) { // If we have a 128-255 value, we can decode it into a single char: if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx - one_char = static_cast(((v0 & 3) << 6) + (static_cast(value[1]) & 0x3F)); + one_char = static_cast(((v0 & 3) << 6) + + (static_cast(value[1]) & 0x3F)); return one_char; } // Otherwise we have a single character, but it's > U+00FF @@ -545,36 +602,42 @@ public: // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a // surrogate pair with total length 2 instantly indicates a range error (but not a "your // string was too long" error). - else if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 16) && str_len == 2) { + else if (StringCaster::UTF_N == 16 && str_len == 2) { one_char = static_cast(value[0]); - if (one_char >= 0xD800 && one_char < 0xE000) + if (one_char >= 0xD800 && one_char < 0xE000) { throw value_error("Character code point not in range(0x10000)"); + } } - if (str_len != 1) + if (str_len != 1) { throw value_error("Expected a character, but multi-character string found"); + } one_char = value[0]; return one_char; } - static constexpr auto name = _(PYBIND11_STRING_NAME); - template using cast_op_type = pybind11::detail::cast_op_type<_T>; + static constexpr auto name = const_name(PYBIND11_STRING_NAME); + template + using cast_op_type = pybind11::detail::cast_op_type<_T>; }; // Base implementation for std::tuple and std::pair -template class Tuple, typename... Ts> class tuple_caster { +template