From dff879cf21ee609cca3662073cd89cb9322146be Mon Sep 17 00:00:00 2001 From: vramadas95 Date: Thu, 10 Nov 2022 20:42:25 -0600 Subject: [PATCH] configs, gpu-compute: Add configurable L1 scalar latencies Previously the scalar cache path used the same latency parameter as the vector cache path for memory requests. This commit adds new parameters for the scalar cache path latencies. This commit also modifies the model to use the new latency parameter to set the memory request latency in the scalar cache. The new paramters are '--scalar-mem-req-latency' and '--scalar-mem-resp-latency' and are set to default values of 50 and 0 respectively Change-Id: I7483f780f2fc0cfbc320ed1fd0c2ee3e2dfc7af2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65511 Reviewed-by: Matt Sinclair Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro Maintainer: Matt Sinclair --- configs/example/apu_se.py | 17 +++++++++++++++++ src/gpu-compute/GPU.py | 13 +++++++++++++ src/gpu-compute/compute_unit.cc | 6 +++++- src/gpu-compute/compute_unit.hh | 2 ++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index b33daa5b39..39def024fc 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -275,6 +275,21 @@ parser.add_argument( default=50, help="Latency for responses from ruby to the cu.", ) +parser.add_argument( + "--scalar-mem-req-latency", + type=int, + default=50, + help="Latency for scalar requests from the cu to ruby.", +) +parser.add_argument( + "--scalar-mem-resp-latency", + type=int, + # Set to 0 as the scalar cache response path does not model + # response latency yet and this parameter is currently not used + default=0, + help="Latency for scalar responses from ruby to the cu.", +) + parser.add_argument( "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs" ) @@ -463,6 +478,8 @@ for i in range(n_cu): vrf_lm_bus_latency=args.vrf_lm_bus_latency, mem_req_latency=args.mem_req_latency, mem_resp_latency=args.mem_resp_latency, + scalar_mem_req_latency=args.scalar_mem_req_latency, + scalar_mem_resp_latency=args.scalar_mem_resp_latency, localDataStore=LdsState( banks=args.numLdsBanks, bankConflictPenalty=args.ldsBankConflictPenalty, diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 517d1801c0..0fdc0b75a7 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -178,6 +178,19 @@ class ComputeUnit(ClockedObject): "TCP and cu as well as TCP data array " "access. Specified in GPU clock cycles", ) + scalar_mem_req_latency = Param.Int( + 50, + "Latency for scalar requests from the cu to ruby. " + "Represents the pipeline to reach the TCP " + "and specified in GPU clock cycles", + ) + scalar_mem_resp_latency = Param.Int( + 50, + "Latency for scalar responses from ruby to the " + "cu. Represents the pipeline between the " + "TCP and cu as well as TCP data array " + "access. Specified in GPU clock cycles", + ) system = Param.System(Parent.any, "system object") cu_id = Param.Int("CU id") vrf_to_coalescer_bus_width = Param.Int( diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 8498ea475e..62cfbf94cf 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -98,6 +98,10 @@ ComputeUnit::ComputeUnit(const Params &p) : ClockedObject(p), countPages(p.countPages), req_tick_latency(p.mem_req_latency * p.clk_domain->clockPeriod()), resp_tick_latency(p.mem_resp_latency * p.clk_domain->clockPeriod()), + scalar_req_tick_latency( + p.scalar_mem_req_latency * p.clk_domain->clockPeriod()), + scalar_resp_tick_latency( + p.scalar_mem_resp_latency * p.clk_domain->clockPeriod()), _requestorId(p.system->getRequestorId(this, "ComputeUnit")), lds(*p.localDataStore), gmTokenPort(name() + ".gmTokenPort", this), ldsPort(csprintf("%s-port", name()), this), @@ -1786,7 +1790,7 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt) = new ComputeUnit::ScalarDataPort::MemReqEvent (computeUnit->scalarDataPort, req_pkt); computeUnit->schedule(scalar_mem_req_event, curTick() + - computeUnit->req_tick_latency); + computeUnit->scalar_req_tick_latency); return true; } diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index a080e3dc1a..fcc4468ec1 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -354,6 +354,8 @@ class ComputeUnit : public ClockedObject Tick req_tick_latency; Tick resp_tick_latency; + Tick scalar_req_tick_latency; + Tick scalar_resp_tick_latency; /** * Number of WFs to schedule to each SIMD. This vector is populated