diff --git a/configs/example/gpufs/amd/AmdGPUOptions.py b/configs/example/gpufs/amd/AmdGPUOptions.py new file mode 100644 index 0000000000..272c417c25 --- /dev/null +++ b/configs/example/gpufs/amd/AmdGPUOptions.py @@ -0,0 +1,127 @@ +# Copyright (c) 2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +def addAmdGPUOptions(parser): + parser.add_argument("-u", "--num-compute-units", type=int, default=4, + help="number of GPU compute units"), + parser.add_argument("--num-cp", type=int, default=0, + help="Number of GPU Command Processors (CP)") + + # not super important now, but to avoid putting the number 4 everywhere, + # make it an option/knob + parser.add_argument("--cu-per-sqc", type=int, default=4, + help="number of CUs sharing an SQC" + " (icache, and thus icache TLB)") + parser.add_argument("--cu-per-scalar-cache", type=int, default=4, + help="Number of CUs sharing a scalar cache") + parser.add_argument("--simds-per-cu", type=int, default=4, + help="SIMD units per CU") + parser.add_argument("--cu-per-sa", type=int, default=4, + help="Number of CUs per shader array. This must be a" + " multiple of options.cu-per-sqc and " + " options.cu-per-scalar") + parser.add_argument("--sa-per-complex", type=int, default=1, + help="Number of shader arrays per complex") + parser.add_argument("--num-gpu-complexes", type=int, default=1, + help="Number of GPU complexes") + parser.add_argument("--wf-size", type=int, default=64, + help="Wavefront size(in workitems)") + parser.add_argument("--sp-bypass-path-length", type=int, default=4, + help="Number of stages of bypass path in vector ALU " + "for Single Precision ops") + parser.add_argument("--dp-bypass-path-length", type=int, default=4, + help="Number of stages of bypass path in vector ALU " + "for Double Precision ops") + #issue period per SIMD unit: number of cycles before issuing another vector + parser.add_argument("--issue-period", type=int, default=4, + help="Number of cycles per vector instruction issue" + " period") + parser.add_argument("--glbmem-wr-bus-width", type=int, default=32, + help="VGPR to Coalescer (Global Memory) data bus width" + " in bytes") + parser.add_argument("--glbmem-rd-bus-width", type=int, default=32, + help="Coalescer to VGPR (Global Memory) data bus width" + " in bytes") + # Currently we only support 1 local memory pipe + parser.add_argument("--shr-mem-pipes-per-cu", type=int, default=1, + help="Number of Shared Memory pipelines per CU") + # Currently we only support 1 global memory pipe + parser.add_argument("--glb-mem-pipes-per-cu", type=int, default=1, + help="Number of Global Memory pipelines per CU") + parser.add_argument("--wfs-per-simd", type=int, default=10, + help="Number of WF slots per SIMD") + + parser.add_argument("--registerManagerPolicy", type=str, + default="static", help="Register manager policy") + parser.add_argument("--vreg-file-size", type=int, default=2048, + help="number of physical vector registers per SIMD") + parser.add_argument("--vreg-min-alloc", type=int, default=4, + help="vector register reservation unit") + + parser.add_argument("--sreg-file-size", type=int, default=2048, + help="number of physical scalar registers per SIMD") + parser.add_argument("--sreg-min-alloc", type=int, default=4, + help="scalar register reservation unit") + + parser.add_argument("--bw-scalor", type=int, default=0, + help="bandwidth scalor for scalability analysis") + parser.add_argument("--CPUClock", type=str, default="2GHz", + help="CPU clock") + parser.add_argument("--gpu-clock", type=str, default="1GHz", + help="GPU clock") + parser.add_argument("--cpu-voltage", action="store", type=str, + default='1.0V', help="CPU voltage domain") + parser.add_argument("--gpu-voltage", action="store", type=str, + default='1.0V', help="GPU voltage domain") + parser.add_argument("--CUExecPolicy", type=str, default="OLDEST-FIRST", + help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)") + parser.add_argument("--LocalMemBarrier",action="store_true", + help="Barrier does not wait for writethroughs to " + " complete") + parser.add_argument("--countPages", action="store_true", + help="Count Page Accesses and output in " + " per-CU output files") + parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for" + "TLBs") + parser.add_argument("--pf-type", type=str, help="type of prefetch: " + "PF_CU, PF_WF, PF_PHASE, PF_STRIDE") + parser.add_argument("--pf-stride", type=int, help="set prefetch stride") + parser.add_argument("--numLdsBanks", type=int, default=32, + help="number of physical banks per LDS module") + parser.add_argument("--ldsBankConflictPenalty", type=int, default=1, + help="number of cycles per LDS bank conflict") + parser.add_argument("--lds-size", type=int, default=65536, + help="Size of the LDS in bytes") + parser.add_argument("--num-hw-queues", type=int, default=10, + help="number of hw queues in packet processor") + parser.add_argument("--reg-alloc-policy", type=str, default="simple", + help="register allocation policy (simple/dynamic)") + diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py new file mode 100644 index 0000000000..2e606331fb --- /dev/null +++ b/configs/example/gpufs/runfs.py @@ -0,0 +1,115 @@ +# Copyright (c) 2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# System includes +import argparse +import math + +# gem5 related +import m5 +from m5.objects import * +from m5.util import addToPath + +# gem5 options and objects +addToPath('../../') +from ruby import Ruby +from common import Simulation +from common import ObjectList +from common import Options +from common import GPUTLBOptions +from common import GPUTLBConfig +from amd import AmdGPUOptions + +# GPU FS related +from system.system import makeGpuFSSystem + + +def addRunFSOptions(parser): + parser.add_argument("--script", default=None, + help="Script to execute in the simulated system") + parser.add_argument("--host-parallel", default=False, + action="store_true", + help="Run multiple host threads in KVM mode") + parser.add_argument("--disk-image", default="", + help="The boot disk image to mount (/dev/sda)") + parser.add_argument("--second-disk", default=None, + help="The second disk image to mount (/dev/sdb)") + parser.add_argument("--kernel", default=None, help="Linux kernel to boot") + parser.add_argument("--gpu-rom", default=None, help="GPU BIOS to load") + parser.add_argument("--gpu-mmio-trace", default=None, + help="GPU MMIO trace to load") + + +def runGpuFSSystem(args): + ''' + This function can be called by higher level scripts designed to simulate + specific devices. As a result the scripts typically hard code some args + that should not be changed by the user. + ''' + + # These are used by the protocols. They should not be set by the user. + n_cu = args.num_compute_units + args.num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc)) + args.num_scalar_cache = \ + int(math.ceil(float(n_cu) / args.cu_per_scalar_cache)) + + system = makeGpuFSSystem(args) + + root = Root(full_system = True, system = system, + time_sync_enable = True, time_sync_period = '1000us') + + if args.script is not None: + system.readfile = args.script + + m5.instantiate() + + + print("Running the simulation") + sim_ticks = args.abs_max_tick + + exit_event = m5.simulate(sim_ticks) + + print('Exiting @ tick %i because %s' % + (m5.curTick(), exit_event.getCause())) + + +if __name__ == "__m5_main__": + # Add gpufs, common, ruby, amdgpu, and gpu tlb args + parser = argparse.ArgumentParser() + addRunFSOptions(parser) + Options.addCommonOptions(parser) + Ruby.define_options(parser) + AmdGPUOptions.addAmdGPUOptions(parser) + GPUTLBOptions.tlb_options(parser) + + args = parser.parse_args() + + runGpuFSSystem(args) diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py new file mode 100644 index 0000000000..b6bf8211c0 --- /dev/null +++ b/configs/example/gpufs/system/amdgpu.py @@ -0,0 +1,150 @@ +# Copyright (c) 2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import m5 +from m5.objects import * + +def createGPU(system, args): + shader = Shader(n_wf = args.wfs_per_simd, + timing = True, + clk_domain = system.clk_domain) + + # VIPER GPU protocol implements release consistency at GPU side. So, + # we make their writes visible to the global memory and should read + # from global memory during kernal boundary. The pipeline initiates + # (or do not initiate) the acquire/release operation depending on + # these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true + # means pipeline initiates a acquire/release operation at kernel launch/end + # VIPER protocol is write-through based, and thus only impl_kern_launch_acq + # needs to set. + shader.impl_kern_launch_acq = True + shader.impl_kern_end_rel = False + + # Switching off per-lane TLB by default + per_lane = False + if args.TLB_config == "perLane": + per_lane = True + + # List of compute units; one GPU can have multiple compute units + compute_units = [] + for i in range(args.num_compute_units): + compute_units.append( + ComputeUnit(cu_id = i, perLaneTLB = per_lane, + num_SIMDs = args.simds_per_cu, + wf_size = args.wf_size, + spbypass_pipe_length = \ + args.sp_bypass_path_length, + dpbypass_pipe_length = \ + args.dp_bypass_path_length, + issue_period = args.issue_period, + coalescer_to_vrf_bus_width = \ + args.glbmem_rd_bus_width, + vrf_to_coalescer_bus_width = \ + args.glbmem_wr_bus_width, + num_global_mem_pipes = \ + args.glb_mem_pipes_per_cu, + num_shared_mem_pipes = \ + args.shr_mem_pipes_per_cu, + n_wf = args.wfs_per_simd, + execPolicy = args.CUExecPolicy, + localMemBarrier = args.LocalMemBarrier, + countPages = args.countPages, + localDataStore = \ + LdsState(banks = args.numLdsBanks, + bankConflictPenalty = \ + args.ldsBankConflictPenalty, + size = args.lds_size))) + + wavefronts = [] + vrfs = [] + vrf_pool_mgrs = [] + srfs = [] + srf_pool_mgrs = [] + for j in range(args.simds_per_cu): + for k in range(shader.n_wf): + wavefronts.append(Wavefront(simdId = j, wf_slot_id = k, + wf_size = args.wf_size)) + + if args.reg_alloc_policy == "simple": + vrf_pool_mgrs.append(SimplePoolManager(pool_size = \ + args.vreg_file_size, + min_alloc = \ + args.vreg_min_alloc)) + srf_pool_mgrs.append(SimplePoolManager(pool_size = \ + args.sreg_file_size, + min_alloc = \ + args.vreg_min_alloc)) + elif args.reg_alloc_policy == "dynamic": + vrf_pool_mgrs.append(DynPoolManager(pool_size = \ + args.vreg_file_size, + min_alloc = \ + args.vreg_min_alloc)) + srf_pool_mgrs.append(DynPoolManager(pool_size = \ + args.sreg_file_size, + min_alloc = \ + args.vreg_min_alloc)) + + vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size, + num_regs=args.vreg_file_size)) + + srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size, + num_regs=args.sreg_file_size)) + + compute_units[-1].wavefronts = wavefronts + compute_units[-1].vector_register_file = vrfs + compute_units[-1].scalar_register_file = srfs + compute_units[-1].register_manager = \ + RegisterManager(policy=args.registerManagerPolicy, + vrf_pool_managers=vrf_pool_mgrs, + srf_pool_managers=srf_pool_mgrs) + if args.TLB_prefetch: + compute_units[-1].prefetch_depth = args.TLB_prefetch + compute_units[-1].prefetch_prev_type = args.pf_type + + # Attach the LDS and the CU to the bus (actually a Bridge) + compute_units[-1].ldsPort = compute_units[-1].ldsBus.cpu_side_port + compute_units[-1].ldsBus.mem_side_port = \ + compute_units[-1].localDataStore.cuPort + + # Attach compute units to GPU + shader.CUs = compute_units + + shader.cpu_pointer = system.cpu[0] + shader.eventq_index = 0 + shader.set_parent(system, "Shader") + + return shader + +def connectGPU(system, args): + system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0) + + system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace + system.pc.south_bridge.gpu.rom_binary = args.gpu_rom diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py new file mode 100644 index 0000000000..be4dd570ab --- /dev/null +++ b/configs/example/gpufs/system/system.py @@ -0,0 +1,169 @@ +# Copyright (c) 2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from system.amdgpu import * + +from m5.util import panic + +from common.Benchmarks import * +from common.FSConfig import * +from common import Simulation +from ruby import Ruby + +def makeGpuFSSystem(args): + # Boot options are standard gem5 options plus: + # - Framebuffer device emulation 0 to reduce driver code paths. + # - Blacklist amdgpu as it cannot (currently) load in KVM CPU. + # - Blacklist psmouse as amdgpu driver adds proprietary commands that + # cause gem5 to panic. + boot_options = ['earlyprintk=ttyS0', 'console=ttyS0,9600', + 'lpj=7999923', 'root=/dev/sda1', + 'drm_kms_helper.fbdev_emulation=0', + 'modprobe.blacklist=amdgpu', + 'modprobe.blacklist=psmouse'] + cmdline = ' '.join(boot_options) + + if MemorySize(args.mem_size) < MemorySize('2GB'): + panic("Need at least 2GB of system memory to load amdgpu module") + + # Use the common FSConfig to setup a Linux X86 System + (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) + bm = SysConfig(disks=[args.disk_image], mem=args.mem_size) + system = makeLinuxX86System(test_mem_mode, args.num_cpus, bm, True, + cmdline=cmdline) + system.workload.object_file = binary(args.kernel) + + # Set the cache line size for the entire system. + system.cache_line_size = args.cacheline_size + + # Create a top-level voltage and clock domain. + system.voltage_domain = VoltageDomain(voltage = args.sys_voltage) + system.clk_domain = SrcClockDomain(clock = args.sys_clock, + voltage_domain = system.voltage_domain) + + # Create a CPU voltage and clock domain. + system.cpu_voltage_domain = VoltageDomain() + system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock, + voltage_domain = + system.cpu_voltage_domain) + + # Create specified number of CPUs. GPUFS really only needs one. + system.cpu = [TestCPUClass(clk_domain=system.cpu_clk_domain, cpu_id=i) + for i in range(args.num_cpus)] + + if ObjectList.is_kvm_cpu(TestCPUClass) or \ + ObjectList.is_kvm_cpu(FutureClass): + system.kvm_vm = KvmVM() + + # Create AMDGPU and attach to southbridge + shader = createGPU(system, args) + connectGPU(system, args) + + # This arbitrary address is something in the X86 I/O hole + hsapp_gpu_map_paddr = 0xe00000000 + gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr, + numHWQueues=args.num_hw_queues) + dispatcher = GPUDispatcher() + gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp, + dispatcher=dispatcher) + shader.dispatcher = dispatcher + shader.gpu_cmd_proc = gpu_cmd_proc + + # GPU, HSAPP, and GPUCommandProc are DMA devices + system._dma_ports.append(gpu_hsapp) + system._dma_ports.append(gpu_cmd_proc) + system._dma_ports.append(system.pc.south_bridge.gpu) + + gpu_hsapp.pio = system.iobus.mem_side_ports + gpu_cmd_proc.pio = system.iobus.mem_side_ports + system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports + + # Create Ruby system using Ruby.py for now + Ruby.create_system(args, True, system, system.iobus, + system._dma_ports) + + # Create a seperate clock domain for Ruby + system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock, + voltage_domain = system.voltage_domain) + + for (i, cpu) in enumerate(system.cpu): + # + # Tie the cpu ports to the correct ruby system ports + # + cpu.clk_domain = system.cpu_clk_domain + cpu.createThreads() + cpu.createInterruptController() + + system.ruby._cpu_ports[i].connectCpuPorts(cpu) + + # The shader core will be whatever is after the CPU cores are accounted for + shader_idx = args.num_cpus + system.cpu.append(shader) + + gpu_port_idx = len(system.ruby._cpu_ports) \ + - args.num_compute_units - args.num_sqc \ + - args.num_scalar_cache + gpu_port_idx = gpu_port_idx - args.num_cp * 2 + + # Connect token ports. For this we need to search through the list of all + # sequencers, since the TCP coalescers will not necessarily be first. Only + # TCP coalescers use a token port for back pressure. + token_port_idx = 0 + for i in range(len(system.ruby._cpu_ports)): + if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer): + system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = \ + system.ruby._cpu_ports[i].gmTokenPort + token_port_idx += 1 + + wavefront_size = args.wf_size + for i in range(args.num_compute_units): + # The pipeline issues wavefront_size number of uncoalesced requests + # in one GPU issue cycle. Hence wavefront_size mem ports. + for j in range(wavefront_size): + system.cpu[shader_idx].CUs[i].memory_port[j] = \ + system.ruby._cpu_ports[gpu_port_idx].in_ports[j] + gpu_port_idx += 1 + + for i in range(args.num_compute_units): + if i > 0 and not i % args.cu_per_sqc: + gpu_port_idx += 1 + system.cpu[shader_idx].CUs[i].sqc_port = \ + system.ruby._cpu_ports[gpu_port_idx].in_ports + gpu_port_idx = gpu_port_idx + 1 + + for i in range(args.num_compute_units): + if i > 0 and not i % args.cu_per_scalar_cache: + gpu_port_idx += 1 + system.cpu[shader_idx].CUs[i].scalar_port = \ + system.ruby._cpu_ports[gpu_port_idx].in_ports + gpu_port_idx = gpu_port_idx + 1 + + return system