configs, gpu-compute: Add configurable L1 scalar latencies

Previously the scalar cache path used the same latency parameter as the
vector cache path for memory requests. This commit adds new parameters
for the scalar cache path latencies. This commit also modifies the model
to use the new latency parameter to set the memory request latency in
the scalar cache. The new paramters are '--scalar-mem-req-latency' and
'--scalar-mem-resp-latency' and are set to default values of 50 and 0
respectively

Change-Id: I7483f780f2fc0cfbc320ed1fd0c2ee3e2dfc7af2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65511
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
vramadas95
2022-11-10 20:42:25 -06:00
committed by VISHNU RAMADAS
parent 78b978686c
commit dff879cf21
4 changed files with 37 additions and 1 deletions

View File

@@ -275,6 +275,21 @@ parser.add_argument(
default=50,
help="Latency for responses from ruby to the cu.",
)
parser.add_argument(
"--scalar-mem-req-latency",
type=int,
default=50,
help="Latency for scalar requests from the cu to ruby.",
)
parser.add_argument(
"--scalar-mem-resp-latency",
type=int,
# Set to 0 as the scalar cache response path does not model
# response latency yet and this parameter is currently not used
default=0,
help="Latency for scalar responses from ruby to the cu.",
)
parser.add_argument(
"--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
)
@@ -463,6 +478,8 @@ for i in range(n_cu):
vrf_lm_bus_latency=args.vrf_lm_bus_latency,
mem_req_latency=args.mem_req_latency,
mem_resp_latency=args.mem_resp_latency,
scalar_mem_req_latency=args.scalar_mem_req_latency,
scalar_mem_resp_latency=args.scalar_mem_resp_latency,
localDataStore=LdsState(
banks=args.numLdsBanks,
bankConflictPenalty=args.ldsBankConflictPenalty,

View File

@@ -178,6 +178,19 @@ class ComputeUnit(ClockedObject):
"TCP and cu as well as TCP data array "
"access. Specified in GPU clock cycles",
)
scalar_mem_req_latency = Param.Int(
50,
"Latency for scalar requests from the cu to ruby. "
"Represents the pipeline to reach the TCP "
"and specified in GPU clock cycles",
)
scalar_mem_resp_latency = Param.Int(
50,
"Latency for scalar responses from ruby to the "
"cu. Represents the pipeline between the "
"TCP and cu as well as TCP data array "
"access. Specified in GPU clock cycles",
)
system = Param.System(Parent.any, "system object")
cu_id = Param.Int("CU id")
vrf_to_coalescer_bus_width = Param.Int(

View File

@@ -98,6 +98,10 @@ ComputeUnit::ComputeUnit(const Params &p) : ClockedObject(p),
countPages(p.countPages),
req_tick_latency(p.mem_req_latency * p.clk_domain->clockPeriod()),
resp_tick_latency(p.mem_resp_latency * p.clk_domain->clockPeriod()),
scalar_req_tick_latency(
p.scalar_mem_req_latency * p.clk_domain->clockPeriod()),
scalar_resp_tick_latency(
p.scalar_mem_resp_latency * p.clk_domain->clockPeriod()),
_requestorId(p.system->getRequestorId(this, "ComputeUnit")),
lds(*p.localDataStore), gmTokenPort(name() + ".gmTokenPort", this),
ldsPort(csprintf("%s-port", name()), this),
@@ -1786,7 +1790,7 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt)
= new ComputeUnit::ScalarDataPort::MemReqEvent
(computeUnit->scalarDataPort, req_pkt);
computeUnit->schedule(scalar_mem_req_event, curTick() +
computeUnit->req_tick_latency);
computeUnit->scalar_req_tick_latency);
return true;
}

View File

@@ -354,6 +354,8 @@ class ComputeUnit : public ClockedObject
Tick req_tick_latency;
Tick resp_tick_latency;
Tick scalar_req_tick_latency;
Tick scalar_resp_tick_latency;
/**
* Number of WFs to schedule to each SIMD. This vector is populated