diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index b33daa5b39..39def024fc 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -275,6 +275,21 @@ parser.add_argument( default=50, help="Latency for responses from ruby to the cu.", ) +parser.add_argument( + "--scalar-mem-req-latency", + type=int, + default=50, + help="Latency for scalar requests from the cu to ruby.", +) +parser.add_argument( + "--scalar-mem-resp-latency", + type=int, + # Set to 0 as the scalar cache response path does not model + # response latency yet and this parameter is currently not used + default=0, + help="Latency for scalar responses from ruby to the cu.", +) + parser.add_argument( "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs" ) @@ -463,6 +478,8 @@ for i in range(n_cu): vrf_lm_bus_latency=args.vrf_lm_bus_latency, mem_req_latency=args.mem_req_latency, mem_resp_latency=args.mem_resp_latency, + scalar_mem_req_latency=args.scalar_mem_req_latency, + scalar_mem_resp_latency=args.scalar_mem_resp_latency, localDataStore=LdsState( banks=args.numLdsBanks, bankConflictPenalty=args.ldsBankConflictPenalty, diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 517d1801c0..0fdc0b75a7 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -178,6 +178,19 @@ class ComputeUnit(ClockedObject): "TCP and cu as well as TCP data array " "access. Specified in GPU clock cycles", ) + scalar_mem_req_latency = Param.Int( + 50, + "Latency for scalar requests from the cu to ruby. " + "Represents the pipeline to reach the TCP " + "and specified in GPU clock cycles", + ) + scalar_mem_resp_latency = Param.Int( + 50, + "Latency for scalar responses from ruby to the " + "cu. Represents the pipeline between the " + "TCP and cu as well as TCP data array " + "access. Specified in GPU clock cycles", + ) system = Param.System(Parent.any, "system object") cu_id = Param.Int("CU id") vrf_to_coalescer_bus_width = Param.Int( diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 8498ea475e..62cfbf94cf 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -98,6 +98,10 @@ ComputeUnit::ComputeUnit(const Params &p) : ClockedObject(p), countPages(p.countPages), req_tick_latency(p.mem_req_latency * p.clk_domain->clockPeriod()), resp_tick_latency(p.mem_resp_latency * p.clk_domain->clockPeriod()), + scalar_req_tick_latency( + p.scalar_mem_req_latency * p.clk_domain->clockPeriod()), + scalar_resp_tick_latency( + p.scalar_mem_resp_latency * p.clk_domain->clockPeriod()), _requestorId(p.system->getRequestorId(this, "ComputeUnit")), lds(*p.localDataStore), gmTokenPort(name() + ".gmTokenPort", this), ldsPort(csprintf("%s-port", name()), this), @@ -1786,7 +1790,7 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt) = new ComputeUnit::ScalarDataPort::MemReqEvent (computeUnit->scalarDataPort, req_pkt); computeUnit->schedule(scalar_mem_req_event, curTick() + - computeUnit->req_tick_latency); + computeUnit->scalar_req_tick_latency); return true; } diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index a080e3dc1a..fcc4468ec1 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -354,6 +354,8 @@ class ComputeUnit : public ClockedObject Tick req_tick_latency; Tick resp_tick_latency; + Tick scalar_req_tick_latency; + Tick scalar_resp_tick_latency; /** * Number of WFs to schedule to each SIMD. This vector is populated