diff --git a/configs/example/gem5_library/x86-mi300x-gpu.py b/configs/example/gem5_library/x86-mi300x-gpu.py index 20fa99b9d8..712c185277 100644 --- a/configs/example/gem5_library/x86-mi300x-gpu.py +++ b/configs/example/gem5_library/x86-mi300x-gpu.py @@ -56,6 +56,7 @@ import argparse from gem5.coherence_protocol import CoherenceProtocol from gem5.components.devices.gpus.amdgpu import MI300X +from gem5.components.memory import HBM2Stack from gem5.components.memory.single_channel import SingleChannelDDR4_2400 from gem5.components.processors.cpu_types import CPUTypes from gem5.components.processors.simple_processor import SimpleProcessor @@ -120,7 +121,7 @@ for core in processor.cores: # The GPU must be created first so we can assign CPU-side DMA ports to the # CPU cache hierarchy. -gpu0 = MI300X() +gpu0 = MI300X(gpu_memory=HBM2Stack(size="16GiB")) cache_hierarchy = ViperCPUCacheHierarchy( l1d_size="32KiB", diff --git a/src/python/gem5/components/devices/gpus/amdgpu.py b/src/python/gem5/components/devices/gpus/amdgpu.py index 67ecc6c2c6..0865aa8b17 100644 --- a/src/python/gem5/components/devices/gpus/amdgpu.py +++ b/src/python/gem5/components/devices/gpus/amdgpu.py @@ -33,6 +33,7 @@ from m5.objects import ( ) from ....components.boards.abstract_board import AbstractBoard +from ....components.memory.abstract_memory_system import AbstractMemorySystem from ....prebuilt.viper.gpu_cache_hierarchy import ViperGPUCacheHierarchy from .viper_shader import ViperShader @@ -51,7 +52,16 @@ class BaseViperGPU(SubSystem): def get_gpu_count(cls): return cls._gpu_count - def __init__(self): + def __init__(self, gpu_memory: AbstractMemorySystem): + super().__init__() + if gpu_memory.has_parent(): + raise ValueError( + "`memory` should not have a parent, i.e. you should " + "instantiate the gpu memory like gpu_memory = HBM2Stack() " + "and **not** like board.gpu_memory = HBM2Stack()" + ) + self._memory = gpu_memory + # Setup various PCI related parameters self._my_id = self.get_gpu_count() pci_dev = self.next_pci_dev() @@ -74,19 +84,11 @@ class BaseViperGPU(SubSystem): # Connect all PIO buses self._shader.connect_iobus(board.get_io_bus()) - # The System() object in gem5 has a memories parameter which defaults - # to Self.all. This will collect *all* AbstractMemories and connect to - # the CPU side. To avoid this we manually assign the memories param to - # the CPU side memories. We need the MemInterface which is called dram - # in the MemCtrl class even though it might not be modelling dram. - memory = board.get_memory() - cpu_abs_mems = [mem.dram for mem in memory.get_memory_controllers()] - board.memories = cpu_abs_mems - # Make the cache hierarchy. This will create an independent RubySystem # class containing only the GPU caches with no network connection to # the CPU cache hierarchy. self._device.gpu_caches = ViperGPUCacheHierarchy( + gpu_memory=self._memory, tcp_size=self._tcp_size, tcp_assoc=self._tcp_assoc, sqc_size=self._sqc_size, @@ -97,19 +99,10 @@ class BaseViperGPU(SubSystem): tcc_assoc=self._tcc_assoc, tcc_count=self._tcc_count, cu_per_sqc=self._cu_per_sqc, - num_memory_channels=self._num_memory_channels, cache_line_size=self._cache_line_size, shader=self._shader, ) - # Collect GPU memory controllers created in the GPU cache hierarchy. - # First assign them as a child to the device so the SimObject unproxy. - # The device requires the memories parameter to be set as the system - # pointer required by the AbstractMemory class is set by AMDGPUDevice. - self._device.mem_ctrls = self._device.gpu_caches.get_mem_ctrls() - gpu_abs_mems = [mem.dram for mem in self._device.mem_ctrls] - self._device.memories = gpu_abs_mems - # Finally attach to the board. PciDevices default to Parent.any for the # PciHost parameter. To make sure this is found we need to connect to # board.pc or a child of board.pc. Historically we place this in the @@ -120,12 +113,20 @@ class BaseViperGPU(SubSystem): # instead of board.pc.south_bridge.gpu_shader.CUs.l1_tlb.gpu_device. gpu_name = f"gpu{self._my_id}" self._device.set_parent(board.pc.south_bridge, gpu_name) + self._device.memory = self._memory + + # Collect GPU memory controllers created in the GPU cache hierarchy. + # First assign them as a child to the device so the SimObject unproxy. + # The device requires the memories parameter to be set as the system + # pointer required by the AbstractMemory class is set by AMDGPUDevice. + self._device.memories = self._memory.get_mem_interfaces() # A scaled down MI210-like device. Defaults to ~1/4th of an MI210. class MI210(BaseViperGPU): def __init__( self, + gpu_memory: AbstractMemorySystem, num_cus: int = 32, cu_per_sqc: int = 4, tcp_size: str = "16KiB", @@ -137,10 +138,9 @@ class MI210(BaseViperGPU): tcc_size: str = "256KiB", tcc_assoc: int = 16, tcc_count: int = 8, - num_memory_channels: int = 8, cache_line_size: int = 64, ): - super().__init__() + super().__init__(gpu_memory=gpu_memory) self._cu_per_sqc = cu_per_sqc self._tcp_size = tcp_size @@ -152,7 +152,6 @@ class MI210(BaseViperGPU): self._tcc_size = tcc_size self._tcc_assoc = tcc_assoc self._tcc_count = tcc_count - self._num_memory_channels = num_memory_channels self._cache_line_size = cache_line_size self._device.device_name = "MI200" @@ -205,6 +204,7 @@ class MI210(BaseViperGPU): class MI300X(BaseViperGPU): def __init__( self, + gpu_memory: AbstractMemorySystem, num_cus: int = 40, cu_per_sqc: int = 4, tcp_size: str = "16KiB", @@ -216,10 +216,9 @@ class MI300X(BaseViperGPU): tcc_size: str = "256KiB", tcc_assoc: int = 16, tcc_count: int = 16, - num_memory_channels: int = 16, cache_line_size: int = 64, ): - super().__init__() + super().__init__(gpu_memory=gpu_memory) self._cu_per_sqc = cu_per_sqc self._tcp_size = tcp_size @@ -231,7 +230,6 @@ class MI300X(BaseViperGPU): self._tcc_size = tcc_size self._tcc_assoc = tcc_assoc self._tcc_count = tcc_count - self._num_memory_channels = num_memory_channels self._cache_line_size = cache_line_size self._device.device_name = "MI300X" diff --git a/src/python/gem5/prebuilt/viper/board.py b/src/python/gem5/prebuilt/viper/board.py index 2bf8869b92..a7958016ac 100644 --- a/src/python/gem5/prebuilt/viper/board.py +++ b/src/python/gem5/prebuilt/viper/board.py @@ -121,6 +121,13 @@ class ViperBoard(X86Board): isa.ExtendedState = avx_extended_state isa.FamilyModelStepping = avx_cpu_features + # The System() object in gem5 has a memories parameter which defaults + # to Self.all. This will collect *all* AbstractMemories and connect to + # the CPU side. To avoid this we manually assign the memories param to + # the CPU side memories. We need the MemInterface which is called dram + # in the MemCtrl class even though it might not be modelling dram. + self.memories = self.memory.get_mem_interfaces() + @overrides(KernelDiskWorkload) def get_disk_device(self): return "/dev/sda" diff --git a/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py b/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py index 6d7d5d90b8..960feff55a 100644 --- a/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py +++ b/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py @@ -35,7 +35,6 @@ from m5.objects import ( RubyPortProxy, RubySequencer, RubySystem, - SimpleMemory, TreePLRURP, ) diff --git a/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py b/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py index 1012d679db..93102b6a32 100644 --- a/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py +++ b/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py @@ -57,11 +57,9 @@ from ...components.cachehierarchies.ruby.caches.viper.sqc import SQCCache from ...components.cachehierarchies.ruby.caches.viper.tcc import TCCCache from ...components.cachehierarchies.ruby.caches.viper.tcp import TCPCache from ...components.devices.gpus.viper_shader import ViperShader +from ...components.memory.abstract_memory_system import AbstractMemorySystem from ...utils.requires import requires -from .viper_network import ( - SimpleDoubleCrossbar, - SimplePt2Pt, -) +from .viper_network import SimpleDoubleCrossbar class ViperGPUCacheHierarchy(AbstractRubyCacheHierarchy): @@ -75,6 +73,7 @@ class ViperGPUCacheHierarchy(AbstractRubyCacheHierarchy): def __init__( self, + gpu_memory: AbstractMemorySystem, tcp_size: str, tcp_assoc: int, sqc_size: str, @@ -85,7 +84,6 @@ class ViperGPUCacheHierarchy(AbstractRubyCacheHierarchy): tcc_assoc: int, tcc_count: int, cu_per_sqc: int, - num_memory_channels: int, cache_line_size: int, shader: ViperShader, ): @@ -284,33 +282,14 @@ class ViperGPUCacheHierarchy(AbstractRubyCacheHierarchy): self._dma_controllers.append(ctrl) - # Create GPU memories. Currently fixed to HBM2. - mem_type_cls = HBM_2000_4H_1x64 - - # AMDGPUDevice currently tells the driver there is 16GiB for memory. - # Until that is a parameter, this need to be fixed to 16GiB. - gpu_mem_range = AddrRange(0, size="16GiB") - intlv_low_bit = int(math.log(self._cache_line_size, 2)) - intlv_bits = int(math.log(num_memory_channels, 2)) - - for idx in range(num_memory_channels): - addr_range = AddrRange( - gpu_mem_range.start, - size=gpu_mem_range.size(), - intlvHighBit=intlv_low_bit + intlv_bits - 1, - intlvBits=intlv_bits, - intlvMatch=idx, - xorHighBit=0, - ) - - mem_ctrl = MemCtrl(dram=mem_type_cls(range=addr_range)) - self._mem_ctrls.append(mem_ctrl) - + gpu_memory.set_memory_range([AddrRange(0, size=gpu_memory.get_size())]) + self._mem_ctrls = gpu_memory.get_memory_controllers() + for addr_range, port in gpu_memory.get_mem_ports(): dir = ViperGPUDirectory( self.ruby_gpu.network, self._cache_line_size, addr_range, - self._mem_ctrls[idx].port, + port, ) dir.ruby_system = self.ruby_gpu