diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index fa7a35381d..44b7922b0a 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -295,6 +295,14 @@ parser.add_argument( help="Latency for scalar responses from ruby to the cu.", ) +parser.add_argument( + "--memtime-latency", + type=int, + # Set to a default of 41 from micro-benchmarks + default=41, + help="Latency for memtimes in scalar memory pipeline.", +) + parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for TLBs") parser.add_argument( "--pf-type", @@ -539,6 +547,7 @@ for i in range(n_cu): mem_resp_latency=args.mem_resp_latency, scalar_mem_req_latency=args.scalar_mem_req_latency, scalar_mem_resp_latency=args.scalar_mem_resp_latency, + memtime_latency=args.memtime_latency, localDataStore=LdsState( banks=args.numLdsBanks, bankConflictPenalty=args.ldsBankConflictPenalty, diff --git a/configs/example/gpufs/amd/AmdGPUOptions.py b/configs/example/gpufs/amd/AmdGPUOptions.py index 9996d33a2e..3cf8641e2e 100644 --- a/configs/example/gpufs/amd/AmdGPUOptions.py +++ b/configs/example/gpufs/amd/AmdGPUOptions.py @@ -253,3 +253,10 @@ def addAmdGPUOptions(parser): default=0, help="number of registers in cache", ) + parser.add_argument( + "--memtime-latency", + type=int, + # Set to a default of 41 from micro-benchmarks + default=41, + help="Latency for memtimes in scalar memory pipeline.", + ) diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py index 55937cd255..dedbcc9324 100644 --- a/configs/example/gpufs/system/amdgpu.py +++ b/configs/example/gpufs/system/amdgpu.py @@ -75,6 +75,7 @@ def createGPU(system, args): execPolicy=args.CUExecPolicy, localMemBarrier=args.LocalMemBarrier, countPages=args.countPages, + memtime_latency=args.memtime_latency, localDataStore=LdsState( banks=args.numLdsBanks, bankConflictPenalty=args.ldsBankConflictPenalty, diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index c10ccac647..62032045ff 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -254,7 +254,7 @@ class SQCCntrl(SQC_Controller, CntrlBase): self.L1cache.create(options) self.L1cache.resourceStalls = options.no_resource_stalls - self.sequencer = RubySequencer() + self.sequencer = VIPERSequencer() self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache diff --git a/src/arch/amdgpu/common/tlb.cc b/src/arch/amdgpu/common/tlb.cc index f1e2e5f89d..151fe7a83d 100644 --- a/src/arch/amdgpu/common/tlb.cc +++ b/src/arch/amdgpu/common/tlb.cc @@ -379,7 +379,11 @@ namespace X86ISA assert(seg != segment_idx::Ms); Addr vaddr = req->getVaddr(); - DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); + if (req->hasNoAddr()) { + return true; + } else { + DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); + } HandyM5Reg m5Reg = tc->readMiscRegNoEffect(misc_reg::M5Reg); if (m5Reg.prot) { @@ -693,13 +697,19 @@ namespace X86ISA if (success) { lookup_outcome = TLB_HIT; // Put the entry in SenderState - TlbEntry *entry = lookup(tmp_req->getVaddr(), false); - assert(entry); - auto p = sender_state->tc->getProcessPtr(); - sender_state->tlbEntry = - new TlbEntry(p->pid(), entry->vaddr, entry->paddr, - false, false); + if (pkt->req->hasNoAddr()) { + sender_state->tlbEntry = + new TlbEntry(p->pid(), 0, 0, + false, false); + } else { + TlbEntry *entry = lookup(tmp_req->getVaddr(), false); + assert(entry); + + sender_state->tlbEntry = + new TlbEntry(p->pid(), entry->vaddr, entry->paddr, + false, false); + } if (update_stats) { // the reqCnt has an entry per level, so its size tells us diff --git a/src/arch/amdgpu/vega/gpu_mem_helpers.hh b/src/arch/amdgpu/vega/gpu_mem_helpers.hh index 73476c84e5..3680ff7977 100644 --- a/src/arch/amdgpu/vega/gpu_mem_helpers.hh +++ b/src/arch/amdgpu/vega/gpu_mem_helpers.hh @@ -204,11 +204,17 @@ initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type) * than the address of the first byte then we have a misaligned * access. */ - bool misaligned_acc = split_addr > vaddr; + bool misaligned_acc = split_addr > vaddr && + !gpuDynInst->staticInstruction()->hasNoAddr(); - RequestPtr req = std::make_shared(vaddr, req_size, 0, - gpuDynInst->computeUnit()->requestorId(), 0, - gpuDynInst->wfDynId); + Request::Flags flags; + if (gpuDynInst->staticInstruction()->hasNoAddr()) { + flags.set(Request::HAS_NO_ADDR); + } + RequestPtr req = std::make_shared( + vaddr, req_size, std::move(flags), + gpuDynInst->computeUnit()->requestorId(), 0, + gpuDynInst->wfDynId); if (misaligned_acc) { RequestPtr req1, req2; diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index a979c1e492..4fe3f71783 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -6107,6 +6107,8 @@ namespace VegaISA } } // getOperandSize + void initiateAcc(GPUDynInstPtr gpuDynInst) override; + void completeAcc(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr) override; }; // Inst_SMEM__S_MEMTIME diff --git a/src/arch/amdgpu/vega/insts/smem.cc b/src/arch/amdgpu/vega/insts/smem.cc index a6af4f007d..3059b83ef6 100644 --- a/src/arch/amdgpu/vega/insts/smem.cc +++ b/src/arch/amdgpu/vega/insts/smem.cc @@ -937,8 +937,9 @@ namespace VegaISA Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) : Inst_SMEM(iFmt, "s_memtime") { - // s_memtime does not issue a memory request - setFlag(ALU); + setFlag(NoAddr); + setFlag(MemoryRef); + setFlag(Load); } // Inst_SMEM__S_MEMTIME Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() @@ -950,10 +951,26 @@ namespace VegaISA void Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst = (ScalarRegU64)gpuDynInst->computeUnit()->curCycle(); - sdst.write(); + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->memtime_latency); + gpuDynInst->scalarAddr = 0; + gpuDynInst->computeUnit()->scalarMemoryPipe.issueRequest(gpuDynInst); } // execute + + void Inst_SMEM__S_MEMTIME::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_MEMTIME::completeAcc(GPUDynInstPtr gpuDynInst) + { + // use U64 because 2 requests, each size 32 + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc // --- Inst_SMEM__S_MEMREALTIME class methods --- Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) diff --git a/src/arch/amdgpu/vega/tlb.cc b/src/arch/amdgpu/vega/tlb.cc index c3dd576f0e..b040f07742 100644 --- a/src/arch/amdgpu/vega/tlb.cc +++ b/src/arch/amdgpu/vega/tlb.cc @@ -259,6 +259,9 @@ GpuTLB::demapPage(Addr va, uint64_t asn) VegaTlbEntry * GpuTLB::tlbLookup(const RequestPtr &req, bool update_stats) { + if (req->hasNoAddr()) { + return NULL; + } Addr vaddr = req->getVaddr(); Addr alignedVaddr = pageAlign(vaddr); DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); @@ -342,20 +345,27 @@ GpuTLB::issueTLBLookup(PacketPtr pkt) // Access the TLB and figure out if it's a hit or a miss. auto entry = tlbLookup(tmp_req, update_stats); - - if (entry) { - lookup_outcome = TLB_HIT; + if (entry || pkt->req->hasNoAddr()) { // Put the entry in SenderState - VegaTlbEntry *entry = lookup(virt_page_addr, false); - assert(entry); + lookup_outcome = TLB_HIT; + if (pkt->req->hasNoAddr()) { + sender_state->tlbEntry = + new VegaTlbEntry(1 /* VMID */, 0, 0, 0, 0); + // set false because we shouldn't go to + // host memory for a memtime request + pkt->req->setSystemReq(false); + } else { + VegaTlbEntry *entry = lookup(virt_page_addr, false); + assert(entry); - // Set if this is a system request - pkt->req->setSystemReq(entry->pte.s); + // Set if this is a system request + pkt->req->setSystemReq(entry->pte.s); - Addr alignedPaddr = pageAlign(entry->paddr); - sender_state->tlbEntry = - new VegaTlbEntry(1 /* VMID */, virt_page_addr, alignedPaddr, - entry->logBytes, entry->pte); + Addr alignedPaddr = pageAlign(entry->paddr); + sender_state->tlbEntry = + new VegaTlbEntry(1 /* VMID */, virt_page_addr, alignedPaddr, + entry->logBytes, entry->pte); + } if (update_stats) { // the reqCnt has an entry per level, so its size tells us diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 8cb40f1c87..e7b88bee38 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -204,6 +204,11 @@ class ComputeUnit(ClockedObject): "TCP and cu as well as TCP data array " "access. Specified in GPU clock cycles", ) + memtime_latency = Param.Int( + 41, + "Latency for memtimes in scalar memory pipeline. " + "Specified in GPU clock cycles", + ) system = Param.System(Parent.any, "system object") cu_id = Param.Int("CU id") vrf_to_coalescer_bus_width = Param.Int( diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py index 2dd7bbeabb..e31072bdda 100644 --- a/src/gpu-compute/GPUStaticInstFlags.py +++ b/src/gpu-compute/GPUStaticInstFlags.py @@ -108,4 +108,5 @@ class GPUStaticInstFlags(Enum): "MAC", # MAC "MAD", # MAD "MFMA", # MFMA + "NoAddr", # Request has no address but goes to SQC for timing ] diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 598864f9f2..a2aa2a9cc8 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -105,6 +105,7 @@ ComputeUnit::ComputeUnit(const Params &p) : ClockedObject(p), p.scalar_mem_req_latency * p.clk_domain->clockPeriod()), scalar_resp_tick_latency( p.scalar_mem_resp_latency * p.clk_domain->clockPeriod()), + memtime_latency(p.memtime_latency * p.clk_domain->clockPeriod()), _requestorId(p.system->getRequestorId(this, "ComputeUnit")), lds(*p.localDataStore), gmTokenPort(name() + ".gmTokenPort", this), ldsPort(csprintf("%s-port", name()), this), diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index 568a81d2c5..5af7edd0cc 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -362,6 +362,8 @@ class ComputeUnit : public ClockedObject Tick scalar_req_tick_latency; Tick scalar_resp_tick_latency; + Tick memtime_latency; + /** * Number of WFs to schedule to each SIMD. This vector is populated * by hasDispResources(), and consumed by the subsequent call to diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index f8b6394d6f..316fef8b2e 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -222,6 +222,8 @@ class GPUStaticInst : public GPUStaticInstFlags bool isMAD() const { return _flags[MAD]; } bool isMFMA() const { return _flags[MFMA]; } + bool hasNoAddr() const { return _flags[NoAddr]; } + virtual int instSize() const = 0; // only used for memory instructions diff --git a/src/mem/request.hh b/src/mem/request.hh index 80bd4c817a..c3845caa91 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -255,7 +255,10 @@ class Request : public Extensible * These flags are *not* cleared when a Request object is * reused (assigned a new address). */ - STICKY_FLAGS = INST_FETCH + STICKY_FLAGS = INST_FETCH, + /** TLBI_EXT_SYNC_COMP seems to be the largest value + of FlagsType, so HAS_NO_ADDR's value is that << 1 */ + HAS_NO_ADDR = 0x0001000000000000, }; static const FlagsType STORE_NO_DATA = CACHE_BLOCK_ZERO | CLEAN | INVALIDATE; @@ -1013,6 +1016,7 @@ class Request : public Extensible bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); } bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); } bool isInstFetch() const { return _flags.isSet(INST_FETCH); } + bool hasNoAddr() const { return _flags.isSet(HAS_NO_ADDR); } bool isPrefetch() const { diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript index ae2e20c4a3..946213ab1a 100644 --- a/src/mem/ruby/SConscript +++ b/src/mem/ruby/SConscript @@ -129,3 +129,4 @@ MakeInclude('system/Sequencer.hh') MakeInclude('system/GPUCoalescer.hh') MakeInclude('system/HTMSequencer.hh') MakeInclude('system/VIPERCoalescer.hh') +MakeInclude('system/VIPERSequencer.hh') diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm index 67c7753f09..d47e13c995 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm @@ -243,12 +243,17 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") if (mandatoryQueue_in.isReady(clockEdge())) { peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { Entry cache_entry := getCacheEntry(in_msg.LineAddress); - TBE tbe := TBEs.lookup(in_msg.LineAddress); - DPRINTF(RubySlicc, "%s\n", in_msg); - if (in_msg.Type == RubyRequestType:REPLACEMENT) { - trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe); + if (in_msg.Type == RubyRequestType:hasNoAddr) { + sequencer.readCallback(in_msg.LineAddress, cache_entry.DataBlk, true, MachineType:L1Cache); + mandatoryQueue_in.dequeue(clockEdge()); } else { - trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); + TBE tbe := TBEs.lookup(in_msg.LineAddress); + DPRINTF(RubySlicc, "%s\n", in_msg); + if (in_msg.Type == RubyRequestType:REPLACEMENT) { + trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe); + } else { + trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); + } } } } diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index 5a7324cb72..8f0341f328 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -193,6 +193,7 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") { TLBI_SYNC, desc="TLB Invalidation Sync operation - Potential initiation"; TLBI_EXT_SYNC, desc="TLB Invalidation Sync operation - External Sync has been requested"; TLBI_EXT_SYNC_COMP, desc="TLB Invalidation Sync operation - External Sync has been completed"; + hasNoAddr, desc="Request for timing purposes in VIPERSequencer hitCallback and processReadCallback but reads no address"; } bool isWriteRequest(RubyRequestType type); diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index ae21dc95ad..2630a6a27c 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -268,7 +268,7 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) } // Check for pio requests and directly send them to the dedicated // pio port. - if (pkt->cmd != MemCmd::MemSyncReq) { + if (pkt->cmd != MemCmd::MemSyncReq && !pkt->req->hasNoAddr()) { if (!pkt->req->isMemMgmt() && !isPhysMemAddress(pkt)) { assert(owner.memRequestPort.isConnected()); DPRINTF(RubyPort, "Request address %#x assumed to be a " @@ -456,7 +456,9 @@ RubyPort::ruby_hit_callback(PacketPtr pkt) // The packet was destined for memory and has not yet been turned // into a response - assert(system->isMemAddr(pkt->getAddr()) || system->isDeviceMemAddr(pkt)); + assert(system->isMemAddr(pkt->getAddr()) || + system->isDeviceMemAddr(pkt) || + pkt->req->hasNoAddr()); assert(pkt->isRequest()); // First we must retrieve the request port from the sender State @@ -613,7 +615,7 @@ RubyPort::MemResponsePort::hitCallback(PacketPtr pkt) // Flush, acquire, release requests don't access physical memory if (pkt->isFlush() || pkt->cmd == MemCmd::MemSyncReq - || pkt->cmd == MemCmd::WriteCompleteResp) { + || pkt->cmd == MemCmd::WriteCompleteResp || pkt->req->hasNoAddr()) { accessPhysMem = false; } diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript index 77bce7f851..9aa3f78da8 100644 --- a/src/mem/ruby/system/SConscript +++ b/src/mem/ruby/system/SConscript @@ -56,6 +56,7 @@ SimObject('Sequencer.py', sim_objects=[ 'DMASequencer']) if env['CONF']['BUILD_GPU']: SimObject('VIPERCoalescer.py', sim_objects=['VIPERCoalescer']) + SimObject('VIPERSequencer.py', sim_objects=['VIPERSequencer']) Source('CacheRecorder.cc') Source('DMASequencer.cc') @@ -68,3 +69,4 @@ Source('RubySystem.cc') Source('Sequencer.cc') if env['CONF']['BUILD_GPU']: Source('VIPERCoalescer.cc') + Source('VIPERSequencer.cc') diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index bb21913879..4b0c6a239c 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -562,6 +562,33 @@ Sequencer::writeCallback(Addr address, DataBlock& data, } } +bool +Sequencer::processReadCallback(SequencerRequest &seq_req, + DataBlock& data, + const bool ruby_request, + bool externalHit, + const MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime) +{ + if (ruby_request) { + assert((seq_req.m_type == RubyRequestType_LD) || + (seq_req.m_type == RubyRequestType_Load_Linked) || + (seq_req.m_type == RubyRequestType_IFETCH)); + } + if ((seq_req.m_type != RubyRequestType_LD) && + (seq_req.m_type != RubyRequestType_Load_Linked) && + (seq_req.m_type != RubyRequestType_IFETCH) && + (seq_req.m_type != RubyRequestType_REPLACEMENT)) { + // Write request: reissue request to the cache hierarchy + issueRequest(seq_req.pkt, seq_req.m_second_type); + return true; + } + return false; + +} + void Sequencer::readCallback(Addr address, DataBlock& data, bool externalHit, const MachineType mach, @@ -583,17 +610,9 @@ Sequencer::readCallback(Addr address, DataBlock& data, bool ruby_request = true; while (!seq_req_list.empty()) { SequencerRequest &seq_req = seq_req_list.front(); - if (ruby_request) { - assert((seq_req.m_type == RubyRequestType_LD) || - (seq_req.m_type == RubyRequestType_Load_Linked) || - (seq_req.m_type == RubyRequestType_IFETCH)); - } - if ((seq_req.m_type != RubyRequestType_LD) && - (seq_req.m_type != RubyRequestType_Load_Linked) && - (seq_req.m_type != RubyRequestType_IFETCH) && - (seq_req.m_type != RubyRequestType_REPLACEMENT)) { - // Write request: reissue request to the cache hierarchy - issueRequest(seq_req.pkt, seq_req.m_second_type); + if (processReadCallback(seq_req, data, ruby_request, externalHit, mach, + initialRequestTime, forwardRequestTime, + firstResponseTime)) { break; } if (ruby_request) { @@ -983,6 +1002,8 @@ Sequencer::makeRequest(PacketPtr pkt) } #endif + } else if (pkt->req->hasNoAddr()) { + primary_type = secondary_type = RubyRequestType_hasNoAddr; } else { // // To support SwapReq, we need to check isWrite() first: a SwapReq diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 3dc61ab4fa..1f60d2638f 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -45,6 +45,7 @@ #include #include +#include "cpu/testers/rubytest/RubyTester.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/protocol/MachineType.hh" #include "mem/ruby/protocol/RubyRequestType.hh" @@ -210,16 +211,24 @@ class Sequencer : public RubyPort statistics::Counter getIncompleteTimes(const MachineType t) const { return m_IncompleteTimes[t]; } - private: + protected: void issueRequest(PacketPtr pkt, RubyRequestType type); + virtual void hitCallback(SequencerRequest* srequest, DataBlock& data, + bool llscSuccess, + const MachineType mach, const bool externalHit, + const Cycles initialRequestTime, + const Cycles forwardRequestTime, + const Cycles firstResponseTime, + const bool was_coalesced); - void hitCallback(SequencerRequest* srequest, DataBlock& data, - bool llscSuccess, - const MachineType mach, const bool externalHit, - const Cycles initialRequestTime, - const Cycles forwardRequestTime, - const Cycles firstResponseTime, - const bool was_coalesced); + virtual bool processReadCallback(SequencerRequest &seq_req, + DataBlock& data, + const bool rubyRequest, + bool externalHit, + const MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime); void recordMissLatency(SequencerRequest* srequest, bool llscSuccess, const MachineType respondingMach, @@ -227,6 +236,7 @@ class Sequencer : public RubyPort Cycles forwardRequestTime, Cycles firstResponseTime); + private: // Private copy constructor and assignment operator Sequencer(const Sequencer& obj); Sequencer& operator=(const Sequencer& obj); diff --git a/src/mem/ruby/system/VIPERSequencer.cc b/src/mem/ruby/system/VIPERSequencer.cc new file mode 100644 index 0000000000..ac840777d4 --- /dev/null +++ b/src/mem/ruby/system/VIPERSequencer.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2024 The University of Wisconsin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/ruby/system/VIPERSequencer.hh" + +#include "debug/RubyHitMiss.hh" +#include "debug/RubySequencer.hh" +#include "gpu-compute/gpu_dyn_inst.hh" +#include "mem/ruby/system/Sequencer.hh" +#include "params/VIPERSequencer.hh" + +namespace gem5 +{ + +namespace ruby +{ + +VIPERSequencer::VIPERSequencer(const Params &p) + : Sequencer(p) +{ +} + + +VIPERSequencer::~VIPERSequencer() +{ +} + +void +VIPERSequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, + bool llscSuccess, + const MachineType mach, const bool externalHit, + const Cycles initialRequestTime, + const Cycles forwardRequestTime, + const Cycles firstResponseTime, + const bool was_coalesced) +{ + if (srequest->m_type != RubyRequestType_hasNoAddr) { + return Sequencer::hitCallback( + srequest, data, llscSuccess, mach, externalHit, initialRequestTime, + forwardRequestTime, firstResponseTime, was_coalesced); + } + + PacketPtr pkt = srequest->pkt; + + assert(!was_coalesced); + DPRINTF(RubySequencer, "Setting hasNoAddr ticks\n"); + Cycles curCycle = + pkt->findNextSenderState + () + ->_gpuDynInst->computeUnit()->curCycle(); + pkt->setData((const uint8_t *)&curCycle); + + // If using the RubyTester, update the RubyTester sender state's + // subBlock with the recieved data. The tester will later access + // this state. + assert(!m_usingRubyTester); + assert(!RubySystem::getWarmupEnabled()); + assert(!RubySystem::getCooldownEnabled()); + ruby_hit_callback(pkt); + testDrainComplete(); +} + +bool +VIPERSequencer::processReadCallback(SequencerRequest &seq_req, + DataBlock& data, + const bool ruby_request, + bool externalHit, + const MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime) +{ + if (seq_req.m_type != RubyRequestType_hasNoAddr) { + return Sequencer::processReadCallback( + seq_req, data, ruby_request, externalHit, mach, initialRequestTime, + forwardRequestTime, firstResponseTime); + } + return false; +} + +} // namespace ruby +} // namespace gem5 diff --git a/src/mem/ruby/system/VIPERSequencer.hh b/src/mem/ruby/system/VIPERSequencer.hh new file mode 100644 index 0000000000..0447d64fc8 --- /dev/null +++ b/src/mem/ruby/system/VIPERSequencer.hh @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2024 The University of Wisconsin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __MEM_RUBY_SYSTEM_VIPERSEQUENCER_HH__ +#define __MEM_RUBY_SYSTEM_VIPERSEQUENCER_HH__ + +#include + +#include "mem/ruby/system/Sequencer.hh" + +namespace gem5 +{ + +struct VIPERSequencerParams; + +namespace ruby +{ + +class VIPERSequencer : public Sequencer +{ + public: + typedef VIPERSequencerParams Params; + VIPERSequencer(const Params &p); + ~VIPERSequencer(); + + private: + void hitCallback(SequencerRequest* srequest, DataBlock& data, + bool llscSuccess, + const MachineType mach, const bool externalHit, + const Cycles initialRequestTime, + const Cycles forwardRequestTime, + const Cycles firstResponseTime, + const bool was_coalesced); + + bool processReadCallback(SequencerRequest &seq_req, + DataBlock& data, + const bool rubyRequest, + bool externalHit, + const MachineType mach, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime); + +}; + +} // namespace ruby +} // namespace gem5 + +#endif //__MEM_RUBY_SYSTEM_VIPERSEQUENCER_HH__ diff --git a/src/mem/ruby/system/VIPERSequencer.py b/src/mem/ruby/system/VIPERSequencer.py new file mode 100644 index 0000000000..9a3b389e96 --- /dev/null +++ b/src/mem/ruby/system/VIPERSequencer.py @@ -0,0 +1,37 @@ +# Copyright (c) 2024 The University of Wisconsin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from m5.objects.Sequencer import * +from m5.params import * +from m5.proxy import * + + +class VIPERSequencer(RubySequencer): + type = "VIPERSequencer" + cxx_class = "gem5::ruby::VIPERSequencer" + cxx_header = "mem/ruby/system/VIPERSequencer.hh"