diff --git a/configs/example/gem5_library/x86-mi300x-gpu.py b/configs/example/gem5_library/x86-mi300x-gpu.py
new file mode 100644
index 0000000000..20fa99b9d8
--- /dev/null
+++ b/configs/example/gem5_library/x86-mi300x-gpu.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Script to run a full system GPU simulation.
+
+Usage:
+------
+```
+scons build/VEGA_X86/gem5.opt
+./build/VEGA_X86/gem5.opt
+    configs/example/gem5_library/x86-viper-gpu.py
+    --image <disk image>
+    --kernel <kernel>
+    --app <gpu application>
+```
+
+Example:
+--------
+```
+./build/VEGA_X86/gem5.opt
+    configs/example/gem5_library/x86-viper-gpu.py
+    --image ./gem5-resources/src/x86-ubuntu-gpu-ml/disk-image/x86-ubuntu-gpu-ml
+    --kernel ./gem5-resources/src/x86-ubuntu-gpu-ml/vmlinux-gpu-ml
+    --app ./gem5-resources/src/gpu/square/bin.default/square.default
+```
+"""
+
+import argparse
+
+from gem5.coherence_protocol import CoherenceProtocol
+from gem5.components.devices.gpus.amdgpu import MI300X
+from gem5.components.memory.single_channel import SingleChannelDDR4_2400
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.isas import ISA
+from gem5.prebuilt.viper.board import ViperBoard
+from gem5.prebuilt.viper.cpu_cache_hierarchy import ViperCPUCacheHierarchy
+from gem5.resources.resource import (
+    DiskImageResource,
+    FileResource,
+)
+from gem5.simulate.simulator import Simulator
+from gem5.utils.requires import requires
+
+requires(
+    isa_required=ISA.X86,
+    coherence_protocol_required=CoherenceProtocol.GPU_VIPER,
+)
+
+# Kernel, disk, and applications are obtained locally.
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--image",
+    type=str,
+    required=True,
+    help="Full path to the gem5-resources x86-ubuntu-gpu-ml disk-image.",
+)
+
+parser.add_argument(
+    "--kernel",
+    type=str,
+    required=True,
+    help="Full path to the gem5-resources vmlinux-gpu-ml kernel.",
+)
+
+parser.add_argument(
+    "--app",
+    type=str,
+    required=True,
+    help="Path to GPU application, python script, or bash script to run",
+)
+
+parser.add_argument(
+    "--kvm-perf",
+    default=False,
+    action="store_true",
+    help="Use KVM perf counters to give accurate GPU insts/cycles with KVM",
+)
+
+args = parser.parse_args()
+
+# stdlib only supports up to 3GiB currently. This will need to be expanded in
+# the future.
+memory = SingleChannelDDR4_2400(size="3GiB")
+
+# Note: Only KVM and ATOMIC work due to buggy MOESI_AMD_Base protocol.
+processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.X86, num_cores=2)
+
+for core in processor.cores:
+    if core.is_kvm_core():
+        core.get_simobject().usePerf = args.kvm_perf
+
+# The GPU must be created first so we can assign CPU-side DMA ports to the
+# CPU cache hierarchy.
+gpu0 = MI300X()
+
+cache_hierarchy = ViperCPUCacheHierarchy(
+    l1d_size="32KiB",
+    l1d_assoc=8,
+    l1i_size="32KiB",
+    l1i_assoc=8,
+    l2_size="1MiB",
+    l2_assoc=16,
+    l3_size="16MiB",
+    l3_assoc=16,
+)
+
+board = ViperBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+    gpus=[gpu0],
+)
+
+# Example of using a local disk image resource
+disk = DiskImageResource(local_path=args.image, root_partition="1")
+kernel = FileResource(local_path=args.kernel)
+
+board.set_kernel_disk_workload(
+    kernel=kernel,
+    disk_image=disk,
+    readfile_contents=board.make_gpu_app(gpu0, args.app),
+)
+
+simulator = Simulator(board=board)
+simulator.run()
diff --git a/src/python/SConscript b/src/python/SConscript
index b7a40c30c8..ab711fb668 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -175,10 +175,34 @@ PySource('gem5.components.cachehierarchies.ruby.caches.mi_example',
     'dma_controller.py')
 PySource('gem5.components.cachehierarchies.ruby.caches.mi_example',
     'gem5/components/cachehierarchies/ruby/caches/mi_example/l1_cache.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/__init__.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/corepair_cache.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/directory.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/dma_controller.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/tcp.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/sqc.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.viper',
+    'gem5/components/cachehierarchies/ruby/caches/viper/tcc.py')
 PySource('gem5.components.cachehierarchies.ruby.topologies',
     'gem5/components/cachehierarchies/ruby/topologies/__init__.py')
 PySource('gem5.components.cachehierarchies.ruby.topologies',
     'gem5/components/cachehierarchies/ruby/topologies/simple_pt2pt.py')
+
+PySource('gem5.components.devices',
+    'gem5/components/devices/__init__.py')
+PySource('gem5.components.devices.gpus',
+    'gem5/components/devices/gpus/__init__.py')
+PySource('gem5.components.devices.gpus',
+    'gem5/components/devices/gpus/amdgpu.py')
+PySource('gem5.components.devices.gpus',
+    'gem5/components/devices/gpus/viper_shader.py')
+
 PySource('gem5.components.memory', 'gem5/components/memory/__init__.py')
 PySource('gem5.components.memory', 'gem5/components/memory/abstract_memory_system.py')
 PySource('gem5.components.memory', 'gem5/components/memory/dramsim_3.py')
@@ -289,6 +313,14 @@ PySource('gem5.prebuilt.riscvmatched',
     'gem5/prebuilt/riscvmatched/riscvmatched_processor.py')
 PySource('gem5.prebuilt.riscvmatched',
     'gem5/prebuilt/riscvmatched/riscvmatched_core.py')
+PySource('gem5.prebuilt.viper', 'gem5/prebuilt/viper/__init__.py')
+PySource('gem5.prebuilt.viper', 'gem5/prebuilt/viper/board.py')
+PySource('gem5.prebuilt.viper',
+    'gem5/prebuilt/viper/cpu_cache_hierarchy.py')
+PySource('gem5.prebuilt.viper',
+    'gem5/prebuilt/viper/gpu_cache_hierarchy.py')
+PySource('gem5.prebuilt.viper',
+    'gem5/prebuilt/viper/viper_network.py')
 PySource('gem5.resources', 'gem5/resources/__init__.py')
 PySource('gem5.resources', 'gem5/resources/client.py')
 PySource('gem5.resources', 'gem5/resources/downloader.py')
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/__init__.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/corepair_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/corepair_cache.py
new file mode 100644
index 0000000000..cc7dcd7c39
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/corepair_cache.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import math
+
+from m5.objects import (
+    CorePair_Controller,
+    MessageBuffer,
+    RubyCache,
+    TreePLRURP,
+)
+
+from gem5.components.processors.abstract_core import AbstractCore
+
+
+class CorePairCache(CorePair_Controller):
+    def __init__(
+        self,
+        l1i_size: str,
+        l1i_assoc: int,
+        l1d_size: str,
+        l1d_assoc: int,
+        l2_size: str,
+        l2_assoc: int,
+        network,
+        cache_line_size,
+        core: AbstractCore,
+    ):
+        """Creating CorePair cache controller. Consist of both instruction
+        and data cache for a pair of L1s and a single L2 cache shared between
+        them.
+        """
+        super().__init__()
+
+        self.send_evictions = core.requires_send_evicts()
+
+        self.L1Icache = RubyCache(
+            size=l1i_size,
+            assoc=l1i_assoc,
+            replacement_policy=TreePLRURP(),
+            resourceStalls=False,
+            dataArrayBanks=2,
+            tagArrayBanks=2,
+            dataAccessLatency=1,
+            tagAccessLatency=1,
+        )
+
+        self.L1D0cache = RubyCache(
+            size=l1d_size,
+            assoc=l1d_assoc,
+            replacement_policy=TreePLRURP(),
+            resourceStalls=False,
+            dataArrayBanks=2,
+            tagArrayBanks=2,
+            dataAccessLatency=1,
+            tagAccessLatency=1,
+        )
+
+        self.L1D1cache = RubyCache(
+            size=l1d_size,
+            assoc=l1d_assoc,
+            replacement_policy=TreePLRURP(),
+            resourceStalls=False,
+            dataArrayBanks=2,
+            tagArrayBanks=2,
+            dataAccessLatency=1,
+            tagAccessLatency=1,
+        )
+
+        self.L2cache = RubyCache(
+            size=l2_size,
+            assoc=l2_assoc,
+            replacement_policy=TreePLRURP(),
+            resourceStalls=False,
+            dataArrayBanks=16,
+            tagArrayBanks=16,
+        )
+
+        self.connectQueues(network)
+
+    def connectQueues(self, network):
+        self.requestFromCore = MessageBuffer()
+        self.requestFromCore.out_port = network.in_port
+
+        self.responseFromCore = MessageBuffer()
+        self.responseFromCore.out_port = network.in_port
+
+        self.unblockFromCore = MessageBuffer()
+        self.unblockFromCore.out_port = network.in_port
+
+        self.probeToCore = MessageBuffer()
+        self.probeToCore.in_port = network.out_port
+
+        self.responseToCore = MessageBuffer()
+        self.responseToCore.in_port = network.out_port
+
+        self.mandatoryQueue = MessageBuffer()
+        self.triggerQueue = MessageBuffer(ordered=True)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/directory.py
new file mode 100644
index 0000000000..2e5c9c1f95
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/directory.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+from m5.objects import (
+    MessageBuffer,
+    RubyDirectoryMemory,
+)
+
+from ......utils.override import overrides
+from ..abstract_directory import AbstractDirectory
+
+
+class ViperDirectory(AbstractDirectory):
+    def __init__(self, network, cache_line_size, mem_range, port):
+        super().__init__(network, cache_line_size)
+        self.addr_ranges = [mem_range]
+        self.directory = RubyDirectoryMemory(
+            block_size=cache_line_size,
+            ruby_system=network.ruby_system,
+        )
+        # Connect this directory to the memory side.
+        self.memory_out_port = port
+
+        # Turn off TCC (GPU cache) related parameters
+        self.noTCCdir = True
+        self.TCC_select_num_bits = 0
+
+        # Defaults which must be set
+        self.CPUonly = False
+        self.GPUonly = False
+        self.useL3OnWT = False
+        self.L2isWB = False
+
+    @overrides(AbstractDirectory)
+    def connectQueues(self, network):
+        self.requestFromDMA = MessageBuffer(ordered=True)
+        self.requestFromDMA.in_port = network.out_port
+
+        self.responseToDMA = MessageBuffer()
+        self.responseToDMA.out_port = network.in_port
+
+        self.requestFromCores = MessageBuffer(ordered=True)
+        self.requestFromCores.in_port = network.out_port
+
+        self.responseFromCores = MessageBuffer()
+        self.responseFromCores.in_port = network.out_port
+
+        self.unblockFromCores = MessageBuffer()
+        self.unblockFromCores.in_port = network.out_port
+
+        self.probeToCore = MessageBuffer()
+        self.probeToCore.out_port = network.in_port
+
+        self.responseToCore = MessageBuffer()
+        self.responseToCore.out_port = network.in_port
+
+        self.triggerQueue = MessageBuffer(ordered=True)
+        self.L3triggerQueue = MessageBuffer(ordered=True)
+
+        self.requestToMemory = MessageBuffer()
+        self.responseFromMemory = MessageBuffer()
+
+
+# This is intended to be used on the CPU side
+class ViperCPUDirectory(ViperDirectory):
+    def __init__(self, network, cache_line_size, mem_range, port):
+        super().__init__(network, cache_line_size, mem_range, port)
+
+        self.CPUonly = True
+        self.GPUonly = False
+
+
+# This is intended to be used on the GPU side
+class ViperGPUDirectory(ViperDirectory):
+    def __init__(self, network, cache_line_size, mem_range, port):
+        super().__init__(network, cache_line_size, mem_range, port)
+
+        self.CPUonly = False
+        self.GPUonly = True
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/dma_controller.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/dma_controller.py
new file mode 100644
index 0000000000..f163840c3c
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/dma_controller.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+from m5.objects import MessageBuffer
+
+from ......utils.override import overrides
+from ..abstract_dma_controller import AbstractDMAController
+
+
+# There is a controller for GPU and GPU to keep the "version" numbers
+# incrementing seperately
+class ViperCPUDMAController(AbstractDMAController):
+    def __init__(self, network, cache_line_size):
+        super().__init__(network, cache_line_size)
+
+    @overrides(AbstractDMAController)
+    def connectQueues(self, network):
+        # A buffer size of 0 means it is an infinite queue. The VIPER
+        # DMA controller has not been thoroughly tested with finite buffers.
+        # Test
+        self.mandatoryQueue = MessageBuffer(buffer_size=0)
+        self.responseFromDir = MessageBuffer(buffer_size=0)
+        self.responseFromDir.in_port = network.out_port
+        self.requestToDir = MessageBuffer(buffer_size=0)
+        self.requestToDir.out_port = network.in_port
+
+
+class ViperGPUDMAController(AbstractDMAController):
+    def __init__(self, network, cache_line_size):
+        super().__init__(network, cache_line_size)
+
+    @overrides(AbstractDMAController)
+    def connectQueues(self, network):
+        # A buffer size of 0 means it is an infinite queue. The VIPER
+        # DMA controller has not been thoroughly tested with finite buffers.
+        # Test
+        self.mandatoryQueue = MessageBuffer(buffer_size=0)
+        self.responseFromDir = MessageBuffer(buffer_size=0)
+        self.responseFromDir.in_port = network.out_port
+        self.requestToDir = MessageBuffer(buffer_size=0)
+        self.requestToDir.out_port = network.in_port
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/sqc.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/sqc.py
new file mode 100644
index 0000000000..835434ff16
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/sqc.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import (
+    MessageBuffer,
+    RubyCache,
+    SQC_Controller,
+    TreePLRURP,
+)
+
+
+class SQCCache(SQC_Controller):
+    def __init__(
+        self,
+        sqc_size: str,
+        sqc_assoc: int,
+        network,
+        cache_line_size,
+    ):
+        """Creating SQC cache controller. This is the Icache for GPU devices."""
+
+        super().__init__()
+
+        self.L1cache = RubyCache(
+            size=sqc_size,
+            assoc=sqc_assoc,
+            dataArrayBanks=8,
+            tagArrayBanks=8,
+            dataAccessLatency=1,
+            tagAccessLatency=1,
+            resourceStalls=True,
+            replacement_policy=TreePLRURP(),
+        )
+
+        self.connectQueues(network)
+
+    def connectQueues(self, network):
+        self.requestFromSQC = MessageBuffer(ordered=True)
+        self.requestFromSQC.out_port = network.in_port
+
+        self.probeToSQC = MessageBuffer(ordered=True)
+        self.probeToSQC.in_port = network.out_port
+
+        self.responseToSQC = MessageBuffer(ordered=True)
+        self.responseToSQC.in_port = network.out_port
+
+        self.mandatoryQueue = MessageBuffer()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/tcc.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/tcc.py
new file mode 100644
index 0000000000..f38c34bf84
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/tcc.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import (
+    MessageBuffer,
+    RubyCache,
+    TCC_Controller,
+    TreePLRURP,
+)
+
+
+class TCCCache(TCC_Controller):
+    def __init__(
+        self,
+        tcc_size: str,
+        tcc_assoc: int,
+        network,
+        cache_line_size,
+    ):
+        """Creating TCC cache controller. This is the L2 cache for GPU devices."""
+
+        super().__init__()
+
+        self.L2cache = RubyCache(
+            size=tcc_size,
+            assoc=tcc_assoc,
+            dataArrayBanks=256,
+            tagArrayBanks=256,
+            dataAccessLatency=8,
+            tagAccessLatency=2,
+            resourceStalls=True,
+            replacement_policy=TreePLRURP(),
+            atomicLatency=0,
+            atomicALUs=64,
+        )
+
+        self.connectQueues(network)
+
+    def connectQueues(self, network):
+        self.requestFromTCP = MessageBuffer(ordered=True)
+        self.requestFromTCP.in_port = network.out_port
+
+        self.responseToCore = MessageBuffer(ordered=True)
+        self.responseToCore.out_port = network.in_port
+
+        self.probeFromNB = MessageBuffer()
+        self.probeFromNB.in_port = network.out_port
+
+        self.responseFromNB = MessageBuffer()
+        self.responseFromNB.in_port = network.out_port
+
+        self.requestToNB = MessageBuffer(ordered=True)
+        self.requestToNB.out_port = network.in_port
+
+        self.responseToNB = MessageBuffer()
+        self.responseToNB.out_port = network.in_port
+
+        self.unblockToNB = MessageBuffer()
+        self.unblockToNB.out_port = network.in_port
+
+        self.triggerQueue = MessageBuffer(ordered=True)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/viper/tcp.py b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/tcp.py
new file mode 100644
index 0000000000..8323eda6fe
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/viper/tcp.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import (
+    MessageBuffer,
+    RubyCache,
+    TCP_Controller,
+    TreePLRURP,
+)
+
+
+class TCPCache(TCP_Controller):
+    def __init__(
+        self,
+        tcp_size: str,
+        tcp_assoc: int,
+        network,
+        cache_line_size,
+    ):
+        """Creating TCP cache controller. This is the L1 cache for GPU devices."""
+
+        super().__init__()
+
+        self.L1cache = RubyCache(
+            size=tcp_size,
+            assoc=tcp_assoc,
+            dataArrayBanks=16,
+            tagArrayBanks=16,
+            dataAccessLatency=4,
+            tagAccessLatency=1,
+            resourceStalls=True,
+            replacement_policy=TreePLRURP(),
+        )
+
+        self.connectQueues(network)
+
+    def connectQueues(self, network):
+        self.requestFromTCP = MessageBuffer(ordered=True)
+        self.requestFromTCP.out_port = network.in_port
+
+        self.responseFromTCP = MessageBuffer(ordered=True)
+        self.responseFromTCP.out_port = network.in_port
+
+        self.unblockFromCore = MessageBuffer()
+        self.unblockFromCore.out_port = network.in_port
+
+        self.probeToTCP = MessageBuffer(ordered=True)
+        self.probeToTCP.in_port = network.out_port
+
+        self.responseToTCP = MessageBuffer(ordered=True)
+        self.responseToTCP.in_port = network.out_port
+
+        self.mandatoryQueue = MessageBuffer()
diff --git a/src/python/gem5/components/devices/__init__.py b/src/python/gem5/components/devices/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/python/gem5/components/devices/gpus/__init__.py b/src/python/gem5/components/devices/gpus/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/python/gem5/components/devices/gpus/amdgpu.py b/src/python/gem5/components/devices/gpus/amdgpu.py
new file mode 100644
index 0000000000..67ecc6c2c6
--- /dev/null
+++ b/src/python/gem5/components/devices/gpus/amdgpu.py
@@ -0,0 +1,279 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import (
+    AMDGPUDevice,
+    SubSystem,
+)
+
+from ....components.boards.abstract_board import AbstractBoard
+from ....prebuilt.viper.gpu_cache_hierarchy import ViperGPUCacheHierarchy
+from .viper_shader import ViperShader
+
+
+class BaseViperGPU(SubSystem):
+    _base_pci_dev = 8
+    _gpu_count = 0
+    _my_id = 0
+
+    @classmethod
+    def next_pci_dev(cls):
+        cls._gpu_count += 1  # Use count for this particular type
+        return cls._base_pci_dev + cls._gpu_count - 1
+
+    @classmethod
+    def get_gpu_count(cls):
+        return cls._gpu_count
+
+    def __init__(self):
+        # Setup various PCI related parameters
+        self._my_id = self.get_gpu_count()
+        pci_dev = self.next_pci_dev()
+
+        device = AMDGPUDevice(pci_func=0, pci_dev=pci_dev, pci_bus=0)
+        self._device = device
+
+    def set_shader(self, shader: ViperShader):
+        self._shader = shader
+
+    def get_cpu_dma_ports(self):
+        return self._shader.get_cpu_dma_ports()
+
+    def connectGPU(self, board: AbstractBoard) -> None:
+        # Connect a CPU pointer. This is only used for SE mode. Any CPU will
+        # work, so pick assuming there is at least one
+        cpus = board.get_processor()
+        self._shader.set_cpu_pointer(cpus.cores[0].core)
+
+        # Connect all PIO buses
+        self._shader.connect_iobus(board.get_io_bus())
+
+        # The System() object in gem5 has a memories parameter which defaults
+        # to Self.all. This will collect *all* AbstractMemories and connect to
+        # the CPU side. To avoid this we manually assign the memories param to
+        # the CPU side memories. We need the MemInterface which is called dram
+        # in the MemCtrl class even though it might not be modelling dram.
+        memory = board.get_memory()
+        cpu_abs_mems = [mem.dram for mem in memory.get_memory_controllers()]
+        board.memories = cpu_abs_mems
+
+        # Make the cache hierarchy. This will create an independent RubySystem
+        # class containing only the GPU caches with no network connection to
+        # the CPU cache hierarchy.
+        self._device.gpu_caches = ViperGPUCacheHierarchy(
+            tcp_size=self._tcp_size,
+            tcp_assoc=self._tcp_assoc,
+            sqc_size=self._sqc_size,
+            sqc_assoc=self._sqc_assoc,
+            scalar_size=self._scalar_size,
+            scalar_assoc=self._scalar_assoc,
+            tcc_size=self._tcc_size,
+            tcc_assoc=self._tcc_assoc,
+            tcc_count=self._tcc_count,
+            cu_per_sqc=self._cu_per_sqc,
+            num_memory_channels=self._num_memory_channels,
+            cache_line_size=self._cache_line_size,
+            shader=self._shader,
+        )
+
+        # Collect GPU memory controllers created in the GPU cache hierarchy.
+        # First assign them as a child to the device so the SimObject unproxy.
+        # The device requires the memories parameter to be set as the system
+        # pointer required by the AbstractMemory class is set by AMDGPUDevice.
+        self._device.mem_ctrls = self._device.gpu_caches.get_mem_ctrls()
+        gpu_abs_mems = [mem.dram for mem in self._device.mem_ctrls]
+        self._device.memories = gpu_abs_mems
+
+        # Finally attach to the board. PciDevices default to Parent.any for the
+        # PciHost parameter. To make sure this is found we need to connect to
+        # board.pc or a child of board.pc. Historically we place this in the
+        # south bridge.
+        board.pc.south_bridge.gpu_shader = self._shader
+
+        # This is cosmetic so the device shows as board.pc.south_bridge.gpu###
+        # instead of board.pc.south_bridge.gpu_shader.CUs.l1_tlb.gpu_device.
+        gpu_name = f"gpu{self._my_id}"
+        self._device.set_parent(board.pc.south_bridge, gpu_name)
+
+
+# A scaled down MI210-like device. Defaults to ~1/4th of an MI210.
+class MI210(BaseViperGPU):
+    def __init__(
+        self,
+        num_cus: int = 32,
+        cu_per_sqc: int = 4,
+        tcp_size: str = "16KiB",
+        tcp_assoc: int = 16,
+        sqc_size: str = "32KiB",
+        sqc_assoc: int = 8,
+        scalar_size: str = "32KiB",
+        scalar_assoc: int = 8,
+        tcc_size: str = "256KiB",
+        tcc_assoc: int = 16,
+        tcc_count: int = 8,
+        num_memory_channels: int = 8,
+        cache_line_size: int = 64,
+    ):
+        super().__init__()
+
+        self._cu_per_sqc = cu_per_sqc
+        self._tcp_size = tcp_size
+        self._tcp_assoc = tcp_assoc
+        self._sqc_size = sqc_size
+        self._sqc_assoc = sqc_assoc
+        self._scalar_size = scalar_size
+        self._scalar_assoc = scalar_assoc
+        self._tcc_size = tcc_size
+        self._tcc_assoc = tcc_assoc
+        self._tcc_count = tcc_count
+        self._num_memory_channels = num_memory_channels
+        self._cache_line_size = cache_line_size
+
+        self._device.device_name = "MI200"
+
+        self._device.DeviceID = 0x740F
+        self._device.SubsystemVendorID = 0x1002
+        self._device.SubsystemID = 0x0C34
+
+        # Setup device-specific address ranges for various SoC components.
+        shader = ViperShader(
+            self._my_id, num_cus, cache_line_size, self._device
+        )
+        self.set_shader(shader)
+
+        # Setup the SDMA engines depending on device. The MMIO base addresses
+        # can be found in the driver code under:
+        # include/asic_reg/sdmaX/sdmaX_Y_Z_offset.h
+        num_sdmas = 5
+        sdma_bases = [0x4980, 0x6180, 0x78000, 0x79000, 0x7A000]
+        sdma_sizes = [0x1000] * 5
+
+        self._device.sdmas = shader._create_sdmas(sdma_bases, sdma_sizes)
+
+        # Setup the Command Processor's PM4 engines.
+        pm4_starts = [0xC000]
+        pm4_ends = [0xD000]
+
+        self._device.pm4_pkt_procs = shader._create_pm4s(pm4_starts, pm4_ends)
+
+    def get_driver_command(self, debug: bool = False):
+        debug_commands = "dmesg -n8\n" if debug else ""
+
+        driver_load_command = (
+            "export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH\n"
+            "export HSA_ENABLE_INTERRUPT=0\n"
+            "export HCC_AMDGPU_TARGET=gfx90a\n"
+            f"{debug_commands}\n"
+            "dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128\n"
+            "if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then\n"
+            '    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."\n'
+            "    /sbin/m5 exit\n"
+            "fi\n"
+            "modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0\n"
+        )
+
+        return driver_load_command
+
+
+# Defaults to a single "XCD" (i.e., 1/8th of a full MI300X).
+class MI300X(BaseViperGPU):
+    def __init__(
+        self,
+        num_cus: int = 40,
+        cu_per_sqc: int = 4,
+        tcp_size: str = "16KiB",
+        tcp_assoc: int = 16,
+        sqc_size: str = "32KiB",
+        sqc_assoc: int = 8,
+        scalar_size: str = "32KiB",
+        scalar_assoc: int = 8,
+        tcc_size: str = "256KiB",
+        tcc_assoc: int = 16,
+        tcc_count: int = 16,
+        num_memory_channels: int = 16,
+        cache_line_size: int = 64,
+    ):
+        super().__init__()
+
+        self._cu_per_sqc = cu_per_sqc
+        self._tcp_size = tcp_size
+        self._tcp_assoc = tcp_assoc
+        self._sqc_size = sqc_size
+        self._sqc_assoc = sqc_assoc
+        self._scalar_size = scalar_size
+        self._scalar_assoc = scalar_assoc
+        self._tcc_size = tcc_size
+        self._tcc_assoc = tcc_assoc
+        self._tcc_count = tcc_count
+        self._num_memory_channels = num_memory_channels
+        self._cache_line_size = cache_line_size
+
+        self._device.device_name = "MI300X"
+
+        self._device.DeviceID = 0x740F
+        self._device.SubsystemVendorID = 0x1002
+        self._device.SubsystemID = 0x0C34
+
+        # Setup device-specific address ranges for various SoC components.
+        shader = ViperShader(
+            self._my_id, num_cus, cache_line_size, self._device
+        )
+        self.set_shader(shader)
+
+        # These currently use MI200 values until the MI300X bios is released.
+        num_sdmas = 5
+        sdma_bases = [0x4980, 0x6180, 0x78000, 0x79000, 0x7A000]
+        sdma_sizes = [0x1000] * 5
+
+        self._device.sdmas = shader._create_sdmas(sdma_bases, sdma_sizes)
+
+        # Setup the Command Processor's PM4 engines.
+        pm4_starts = [0xC000]
+        pm4_ends = [0xD000]
+
+        self._device.pm4_pkt_procs = shader._create_pm4s(pm4_starts, pm4_ends)
+
+    def get_driver_command(self, debug: bool = False):
+        debug_commands = "dmesg -n8\n" if debug else ""
+
+        driver_load_command = (
+            "export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH\n"
+            "export HSA_ENABLE_INTERRUPT=0\n"
+            "export HCC_AMDGPU_TARGET=gfx942\n"
+            'export HSA_OVERRIDE_GFX_VERSION="9.4.2"\n'
+            f"{debug_commands}\n"
+            "dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128\n"
+            "if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then\n"
+            '    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."\n'
+            "    /sbin/m5 exit\n"
+            "fi\n"
+            "modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0\n"
+        )
+
+        return driver_load_command
diff --git a/src/python/gem5/components/devices/gpus/viper_shader.py b/src/python/gem5/components/devices/gpus/viper_shader.py
new file mode 100644
index 0000000000..ce5e0a5eeb
--- /dev/null
+++ b/src/python/gem5/components/devices/gpus/viper_shader.py
@@ -0,0 +1,377 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from typing import List
+
+from m5.objects import (
+    AddrRange,
+    AMDGPUDevice,
+    AMDGPUInterruptHandler,
+    AMDGPUMemoryManager,
+    AMDGPUSystemHub,
+    BaseCPU,
+    BaseXBar,
+    ComputeUnit,
+    DynPoolManager,
+    GPUCommandProcessor,
+    GPUDispatcher,
+    HSAPacketProcessor,
+    LdsState,
+    PciLegacyIoBar,
+    PM4PacketProcessor,
+    RegisterFileCache,
+    RegisterManager,
+    ScalarRegisterFile,
+    SDMAEngine,
+    Shader,
+    VectorRegisterFile,
+    VegaGPUTLB,
+    VegaPagetableWalker,
+    VegaTLBCoalescer,
+    Wavefront,
+)
+
+
+class ViperCU(ComputeUnit):
+    def __init__(self, cu_id: int, device: AMDGPUDevice):
+        """ComputeUnit object of a gfx9-like compute unit."""
+        super().__init__()
+        self._device = device
+
+        self.cu_id = cu_id
+
+        # Use in multiple places. Define variables to change once.
+        self._vreg_file_size = 2048
+        self._sreg_file_size = 2048
+
+        # Latencies, etc. use defaults in src/gpu-compute/GPU.py.
+        self.num_SIMDs = 4
+        self.n_wf = 8
+
+        self.localDataStore = LdsState(
+            size=65536,
+        )
+
+        self.wavefronts = [
+            Wavefront(simdId=j, wf_slot_id=k)
+            for j in range(self.num_SIMDs)
+            for k in range(self.n_wf)
+        ]
+
+        self.vector_register_file = [
+            VectorRegisterFile(simd_id=i, num_regs=self._vreg_file_size)
+            for i in range(self.num_SIMDs)
+        ]
+
+        self.scalar_register_file = [
+            ScalarRegisterFile(simd_id=i, num_regs=self._sreg_file_size)
+            for i in range(self.num_SIMDs)
+        ]
+
+        self.register_file_cache = [
+            RegisterFileCache(simd_id=i) for i in range(self.num_SIMDs)
+        ]
+
+        self.register_manager = RegisterManager(
+            policy="static",
+            vrf_pool_managers=[
+                DynPoolManager(pool_size=self._vreg_file_size, min_alloc=4)
+                for _ in range(self.num_SIMDs)
+            ],
+            srf_pool_managers=[
+                DynPoolManager(pool_size=self._sreg_file_size, min_alloc=4)
+                for _ in range(self.num_SIMDs)
+            ],
+        )
+
+        self.ldsPort = self.ldsBus.cpu_side_port
+        self.ldsBus.mem_side_port = self.localDataStore.cuPort
+
+        self._create_tlbs()
+
+    def _create_tlbs(self):
+        # Vector memory TLB
+        self.l1_tlb = VegaGPUTLB(
+            gpu_device=self._device,
+            size=64,
+            assoc=64,
+            hitLatency=1,
+            missLatency1=750,
+            missLatency2=750,
+            maxOutstandingReqs=64,
+        )
+
+        self.l1_coalescer = VegaTLBCoalescer(tlb_level=1)
+
+        self.translation_port = self.l1_coalescer.cpu_side_ports
+        self.l1_coalescer.mem_side_ports = self.l1_tlb.cpu_side_ports
+
+        # Scalar memory TLB
+        self.scalar_tlb = VegaGPUTLB(
+            gpu_device=self._device,
+            size=64,
+            assoc=64,
+            hitLatency=1,
+            missLatency1=750,
+            missLatency2=750,
+            maxOutstandingReqs=64,
+        )
+
+        self.scalar_coalescer = VegaTLBCoalescer(tlb_level=1)
+
+        self.scalar_tlb_port = self.scalar_coalescer.cpu_side_ports
+        self.scalar_coalescer.mem_side_ports = self.scalar_tlb.cpu_side_ports
+
+        # Instruction memory TLB
+        self.sqc_tlb = VegaGPUTLB(
+            gpu_device=self._device,
+            size=64,
+            assoc=64,
+            hitLatency=1,
+            missLatency1=750,
+            missLatency2=750,
+            maxOutstandingReqs=64,
+        )
+
+        self.sqc_coalescer = VegaTLBCoalescer(tlb_level=1)
+
+        self.sqc_tlb_port = self.sqc_coalescer.cpu_side_ports
+        self.sqc_coalescer.mem_side_ports = self.sqc_tlb.cpu_side_ports
+
+    def get_tlb_ports(self):
+        return [
+            self.l1_tlb.mem_side_ports,
+            self.sqc_tlb.mem_side_ports,
+            self.scalar_tlb.mem_side_ports,
+        ]
+
+
+class ViperShader(Shader):
+    def __init__(
+        self,
+        shader_id: int,
+        num_cus: int,
+        cache_line_size: int,
+        device: AMDGPUDevice,
+    ):
+        """
+        The shader defines something the represents a single software visible
+        GPU (e.g., a graphics card, a chiplet on a GPU, etc.).
+        """
+        super().__init__()
+
+        self._shader_id = shader_id
+        self._cache_line_size = cache_line_size
+        self._device = device
+
+        self.n_wf = 8
+        self.timing = True
+        # used to track the (many, many) DMA ports
+        self._cpu_dma_ports = []
+        self._gpu_dma_ports = []
+
+        # VIPER GPU protocol implements release consistency at GPU side. So,
+        # we make their writes visible to the global memory and should read
+        # from global memory during kernal boundary. The pipeline initiates
+        # (or do not initiate) the acquire/release operation depending on
+        # these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true
+        # means pipeline initiates a acquire/release operation at kernel launch/end
+        # VIPER protocol is write-through based, and thus only impl_kern_launch_acq
+        # needs to set.
+        self.impl_kern_launch_acq = True
+        self.impl_kern_end_rel = False
+
+        # Attach compute units to GPU
+        self.CUs = [ViperCU(idx, device) for idx in range(num_cus)]
+
+        self._create_tlbs(device)
+
+        # This arbitrary address is something in the X86 I/O hole
+        hsapp_gpu_map_paddr = 0xE00000000
+        self.dispatcher = GPUDispatcher()
+        self.gpu_cmd_proc = GPUCommandProcessor(
+            hsapp=HSAPacketProcessor(
+                pioAddr=hsapp_gpu_map_paddr,
+                numHWQueues=10,
+                walker=VegaPagetableWalker(),
+            ),
+            dispatcher=self.dispatcher,
+            walker=VegaPagetableWalker(),
+        )
+        self._cpu_dma_ports.append(self.gpu_cmd_proc.hsapp.dma)
+        self._cpu_dma_ports.append(self.gpu_cmd_proc.dma)
+
+        self._gpu_dma_ports.append(self.gpu_cmd_proc.hsapp.walker.port)
+        self._gpu_dma_ports.append(self.gpu_cmd_proc.walker.port)
+
+        self.system_hub = AMDGPUSystemHub()
+        self._cpu_dma_ports.append(self.system_hub.dma)
+
+        self._setup_device(device)
+
+    def get_compute_units(self):
+        return self.CUs
+
+    def _setup_device(self, device: AMDGPUDevice):
+        """Set the device type info on the device connected via PCI."""
+        device.cp = self.gpu_cmd_proc
+        device.device_ih = AMDGPUInterruptHandler()
+        self._cpu_dma_ports.append(device.device_ih.dma)
+
+        # GPU data path
+        device.memory_manager = AMDGPUMemoryManager(
+            cache_line_size=self._cache_line_size,
+        )
+        self._gpu_dma_ports.append(device.memory_manager.port)
+
+        self._cpu_dma_ports.append(device.dma)
+
+        # Use the gem5 default of 0x280 OR'd  with 0x10 which tells Linux there is
+        # a PCI capabilities list to travse.
+        device.Status = 0x0290
+
+        # The PCI capabilities are like a linked list. The list has a memory
+        # offset and a capability type ID read by the OS. Make the first
+        # capability at 0x80 and set the PXCAP (PCI express) capability to
+        # that address. Mark the type ID as PCI express.
+        # We leave the next ID of PXCAP blank to end the list.
+        device.PXCAPBaseOffset = 0x80
+        device.CapabilityPtr = 0x80
+        device.PXCAPCapId = 0x10
+
+        # Set bits 7 and 8 in the second PCIe device capabilities register which
+        # reports support for PCIe atomics for 32 and 64 bits respectively.
+        # Bit 9 for 128-bit compare and swap is not set because the amdgpu driver
+        # does not check this.
+        device.PXCAPDevCap2 = 0x00000180
+
+        # Set bit 6 to enable atomic requestor, meaning this device can request
+        # atomics from other PCI devices.
+        device.PXCAPDevCtrl2 = 0x00000040
+
+        # If there are multiple GPUs in the system, make sure the VBIOS region
+        # and the legacy IO bar do not overlap with the ranges from other GPUs.
+        if self._shader_id != 0:
+            device.ExpansionROM = 0xD0000000 + (0x20000 * self._shader_id)
+            bar4_addr = 0xF000 + (0x100 * self._shader_id)
+            device.BAR4 = PciLegacyIoBar(addr=bar4_addr, size="256B")
+
+    def _create_pm4s(self, pm4_starts: List[int], pm4_ends: List[int]):
+        """Create PM4 packet processors."""
+        num_pm4s = len(pm4_starts)
+
+        pm4_procs = [
+            PM4PacketProcessor(
+                ip_id=i,
+                mmio_range=AddrRange(start=pm4_starts[i], end=pm4_ends[i]),
+            )
+            for i in range(num_pm4s)
+        ]
+
+        for pm4_proc in pm4_procs:
+            self._cpu_dma_ports.append(pm4_proc.dma)
+
+        return pm4_procs
+
+    def _create_sdmas(self, sdma_bases: List[int], sdma_sizes: List[int]):
+        """Create the SDMA engines."""
+        num_sdmas = len(sdma_bases)
+
+        sdmas = [
+            SDMAEngine(
+                walker=VegaPagetableWalker(),
+                mmio_base=sdma_bases[i],
+                mmio_size=sdma_sizes[i],
+            )
+            for i in range(num_sdmas)
+        ]
+
+        for sdma in sdmas:
+            self._cpu_dma_ports.append(sdma.dma)
+            self._gpu_dma_ports.append(sdma.walker.port)
+
+        return sdmas
+
+    def get_cpu_dma_ports(self):
+        return self._cpu_dma_ports
+
+    def get_gpu_dma_ports(self):
+        return self._gpu_dma_ports
+
+    def _create_tlbs(self, device: AMDGPUDevice):
+        """Connect per-CU TLBs to the L2/L3 TLBs"""
+        self.l2_tlb = VegaGPUTLB(
+            gpu_device=device,
+            size=4096,
+            assoc=64,
+            hitLatency=69,
+            missLatency1=750,
+            missLatency2=750,
+            maxOutstandingReqs=64,
+        )
+
+        self.l2_coalescer = VegaTLBCoalescer(tlb_level=2)
+
+        self.l3_tlb = VegaGPUTLB(
+            gpu_device=device,
+            size=8192,
+            assoc=64,
+            hitLatency=150,
+            missLatency1=750,
+            missLatency2=750,
+            maxOutstandingReqs=64,
+        )
+
+        self.l3_coalescer = VegaTLBCoalescer(tlb_level=3)
+
+        # Port flow: [L1s] -> L2 coalescer -> L2 tlb -> L3 coalescer -> L3 tlb
+        for cu in self.CUs:
+            for port in cu.get_tlb_ports():
+                self.l2_coalescer.cpu_side_ports = port
+        self.l2_coalescer.mem_side_ports = self.l2_tlb.cpu_side_ports
+        self.l2_tlb.mem_side_ports = self.l3_coalescer.cpu_side_ports
+        self.l3_coalescer.mem_side_ports = self.l3_tlb.cpu_side_ports
+
+        self._gpu_dma_ports.append(self.l3_tlb.walker.port)
+
+    def connect_iobus(self, iobus: BaseXBar):
+        """Connect the GPU objects to the IO bus."""
+        self.gpu_cmd_proc.pio = iobus.mem_side_ports
+        self.gpu_cmd_proc.hsapp.pio = iobus.mem_side_ports
+        self.system_hub.pio = iobus.mem_side_ports
+        self._device.pio = iobus.mem_side_ports
+        self._device.device_ih.pio = iobus.mem_side_ports
+        for sdma in self._device.sdmas:
+            sdma.pio = iobus.mem_side_ports
+        for pm4_proc in self._device.pm4_pkt_procs:
+            pm4_proc.pio = iobus.mem_side_ports
+
+    def set_cpu_pointer(self, cpu: BaseCPU):
+        """Set the CPU pointer for the Shader."""
+        self.cpu_pointer = cpu
diff --git a/src/python/gem5/prebuilt/viper/__init__.py b/src/python/gem5/prebuilt/viper/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/python/gem5/prebuilt/viper/board.py b/src/python/gem5/prebuilt/viper/board.py
new file mode 100644
index 0000000000..10eb57d5e5
--- /dev/null
+++ b/src/python/gem5/prebuilt/viper/board.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import base64
+import os
+from typing import (
+    List,
+    Optional,
+)
+
+from ...components.boards.abstract_board import AbstractBoard
+from ...components.boards.kernel_disk_workload import KernelDiskWorkload
+from ...components.boards.x86_board import X86Board
+from ...components.cachehierarchies.abstract_cache_hierarchy import (
+    AbstractCacheHierarchy,
+)
+from ...components.devices.gpus.amdgpu import BaseViperGPU
+from ...components.memory.abstract_memory_system import AbstractMemorySystem
+from ...components.processors.abstract_processor import AbstractProcessor
+from ...utils.override import overrides
+
+
+class ViperBoard(X86Board):
+    """
+    A derivative of X86Board capable of full system simulation for X86 with a
+    GPU device. Provides all the functionality of the X86Board with helper
+    methods specific to booting a disk with GPU libraries installed.
+    """
+
+    def __init__(
+        self,
+        clk_freq: str,
+        processor: AbstractProcessor,
+        memory: AbstractMemorySystem,
+        cache_hierarchy: AbstractCacheHierarchy,
+        gpus: Optional[List[BaseViperGPU]] = None,
+    ) -> None:
+        super().__init__(
+            clk_freq=clk_freq,
+            processor=processor,
+            memory=memory,
+            cache_hierarchy=cache_hierarchy,
+        )
+        print("Viper board __init__ was called here now")
+
+        self._gpus = gpus
+
+    def get_devices(self):
+        return self._gpus
+
+    @overrides(AbstractBoard)
+    def _connect_things(self) -> None:
+        print("Viper board connect things was called here now")
+        super()._connect_things()
+
+        if self._gpus is not None:
+            for gpu in self._gpus:
+                gpu.connectGPU(self)
+
+    @overrides(KernelDiskWorkload)
+    def get_disk_device(self):
+        return "/dev/sda"
+
+    @overrides(KernelDiskWorkload)
+    def get_default_kernel_args(self) -> List[str]:
+        # The regular parameters used with gem5 plus (1) fbdev_emulation=0
+        # to disable having to implement this functionality, (2) blacklist
+        # amdgpu because we need to copy the VBIOS into memory first, and (3)
+        # blacklist psmouse as amdgpu driver adds new mouse commands which
+        # gem5 does not implement and they do not seem to be documented.
+        return [
+            "earlyprintk=ttyS0",
+            "console=ttyS0",
+            "lpj=7999923",
+            "root={root_value}",
+            "drm_kms_helper.fbdev_emulation=0",
+            "modprobe.blacklist=amdgpu",
+            "modprobe.blacklist=psmouse",
+        ]
+
+    # Replicate the capability of the old GPUFS config, which embed a binary
+    # application or script into a bash script setting up the environment and
+    # loading the GPU driver.
+    def make_gpu_app(self, gpu: BaseViperGPU, app: str, debug: bool = False):
+        driver_load_command = gpu.get_driver_command(debug=debug)
+
+        with open(os.path.abspath(app), "rb") as binfile:
+            encodedBin = base64.b64encode(binfile.read()).decode()
+
+        application_command = (
+            f'echo "{encodedBin}" | base64 -d > myapp\n'
+            "chmod +x myapp\n"
+            "./myapp {}\n"
+            "/sbin/m5 exit\n"
+        )
+
+        return driver_load_command + application_command
diff --git a/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py b/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py
new file mode 100644
index 0000000000..6d7d5d90b8
--- /dev/null
+++ b/src/python/gem5/prebuilt/viper/cpu_cache_hierarchy.py
@@ -0,0 +1,273 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import math
+
+from m5.objects import (
+    DMASequencer,
+    RubyCache,
+    RubyPortProxy,
+    RubySequencer,
+    RubySystem,
+    SimpleMemory,
+    TreePLRURP,
+)
+
+from ...coherence_protocol import CoherenceProtocol
+from ...components.cachehierarchies.abstract_cache_hierarchy import (
+    AbstractCacheHierarchy,
+)
+from ...components.cachehierarchies.ruby.abstract_ruby_cache_hierarchy import (
+    AbstractRubyCacheHierarchy,
+)
+from ...components.cachehierarchies.ruby.caches.viper.corepair_cache import (
+    CorePairCache,
+)
+from ...components.cachehierarchies.ruby.caches.viper.directory import (
+    ViperCPUDirectory,
+)
+from ...components.cachehierarchies.ruby.caches.viper.dma_controller import (
+    ViperCPUDMAController,
+)
+from ...prebuilt.viper.board import ViperBoard
+from ...utils.override import overrides
+from ...utils.requires import requires
+from .viper_network import SimplePt2Pt
+
+
+class ViperCPUCacheHierarchy(AbstractRubyCacheHierarchy):
+    """
+    The VIPER CPU cache hierarchy creates CPU-side Ruby caches and connects
+    the nodes using a simple point-to-point topology.
+    """
+
+    def __init__(
+        self,
+        l1d_size: str,
+        l1d_assoc: int,
+        l1i_size: str,
+        l1i_assoc: int,
+        l2_size: str,
+        l2_assoc: int,
+        l3_size: str,
+        l3_assoc: int,
+    ):
+        """
+        :param size: The size of each cache in the heirarchy.
+        :param assoc: The associativity of each cache.
+        :param device_dmas: Optional list of CPU connect device DMAs
+        """
+        super().__init__()
+
+        self._l1d_size = l1d_size
+        self._l1d_assoc = l1d_assoc
+        self._l1i_size = l1i_size
+        self._l1i_assoc = l1i_assoc
+        self._l2_size = l2_size
+        self._l2_assoc = l2_assoc
+        self._l3_size = l3_size
+        self._l3_assoc = l3_assoc
+
+        self.ruby_system = RubySystem()
+
+    @overrides(AbstractCacheHierarchy)
+    def incorporate_cache(self, board: ViperBoard) -> None:
+        requires(coherence_protocol_required=CoherenceProtocol.GPU_VIPER)
+
+        # Ruby networks for CPU
+        self.ruby_system.network = SimplePt2Pt(self.ruby_system)
+
+        # MOESI_AMD_Base uses 5 virtual networks.
+        self.ruby_system.number_of_virtual_networks = 5
+        self.ruby_system.network.number_of_virtual_networks = 5
+
+        # There is a single local list of all of the controllers to make it
+        # easier to connect everything to the CPU network. This can be
+        # customized depending on the topology/network requirements.
+        # Create one controller for each L1 cache (and the cache mem obj.)
+        # Create a single directory controller (Really the memory cntrl).
+        self._controllers = []
+
+        cores = board.get_processor().get_cores()
+        num_cores = len(cores)
+        for i in range(0, num_cores, 2):
+            cache = CorePairCache(
+                l1d_size=self._l1d_size,
+                l1d_assoc=self._l1d_assoc,
+                l1i_size=self._l1i_size,
+                l1i_assoc=self._l1i_assoc,
+                l2_size=self._l2_size,
+                l2_assoc=self._l2_assoc,
+                network=self.ruby_system.network,
+                cache_line_size=board.get_cache_line_size(),
+                core=cores[i],
+            )
+
+            cache.version = i // 2
+            cache.ruby_system = self.ruby_system
+            cache.clk_domain = board.get_clock_domain()
+
+            cache.sequencer = RubySequencer(
+                version=i,
+                dcache=cache.L1D0cache,
+                ruby_system=self.ruby_system,
+                coreid=0,
+                is_cpu_sequencer=True,
+                clk_domain=board.get_clock_domain(),
+            )
+
+            cache.sequencer1 = RubySequencer(
+                version=i + 1,
+                dcache=cache.L1D1cache,
+                ruby_system=self.ruby_system,
+                coreid=1,
+                is_cpu_sequencer=True,
+                clk_domain=board.get_clock_domain(),
+            )
+
+            cache.sequencer.connectIOPorts(board.get_io_bus())
+            cache.sequencer1.connectIOPorts(board.get_io_bus())
+
+            cores[i].connect_icache(cache.sequencer.in_ports)
+            cores[i].connect_dcache(cache.sequencer.in_ports)
+
+            cores[i].connect_walker_ports(
+                cache.sequencer.in_ports, cache.sequencer.in_ports
+            )
+
+            # Connect the interrupt ports
+            int_req_port = cache.sequencer.interrupt_out_port
+            int_resp_port = cache.sequencer.in_ports
+            cores[i].connect_interrupt(int_req_port, int_resp_port)
+
+            if i + 1 < num_cores:
+                cores[i + 1].connect_icache(cache.sequencer1.in_ports)
+                cores[i + 1].connect_dcache(cache.sequencer1.in_ports)
+
+                cores[i + 1].connect_walker_ports(
+                    cache.sequencer.in_ports, cache.sequencer1.in_ports
+                )
+
+                # Connect the interrupt ports
+                cores[i + 1].connect_interrupt(int_req_port, int_resp_port)
+
+            self._controllers.append(cache)
+
+        # Create the CPU directory controllers
+        self._directory_controllers = []
+
+        # Automatically determine the numa bit. This can be changed to
+        # increase the number of bytes to each memory channel before
+        # going to the next channels
+        dir_bits = int(math.log(len(board.get_mem_ports()), 2))
+        block_size_bits = int(math.log(board.get_cache_line_size()))
+
+        for addr_range, port in board.get_mem_ports():
+            dir = ViperCPUDirectory(
+                self.ruby_system.network,
+                board.get_cache_line_size(),
+                addr_range,
+                port,
+            )
+            dir.ruby_system = self.ruby_system
+            dir.version = len(self._directory_controllers)
+            self._directory_controllers.append(dir)
+
+            dir.L3CacheMemory = RubyCache(
+                size=self._l3_size,
+                assoc=self._l3_assoc,
+                replacement_policy=TreePLRURP(),
+                resourceStalls=False,
+                dataArrayBanks=16,
+                tagArrayBanks=16,
+                dataAccessLatency=20,
+                tagAccessLatency=15,
+            )
+
+        # Create the DMA Controllers, if required.
+        self._dma_controllers = []
+        if board.has_dma_ports():
+            dma_ports = board.get_dma_ports()
+            for i, port in enumerate(dma_ports):
+                ctrl = ViperCPUDMAController(
+                    self.ruby_system.network, board.get_cache_line_size()
+                )
+                ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+
+                ctrl.ruby_system = self.ruby_system
+                ctrl.dma_sequencer.ruby_system = self.ruby_system
+
+                self._dma_controllers.append(ctrl)
+
+        # Create DMA Controllers requires for any devices in the system.
+        device_dmas = []
+        if board.get_devices() is not None:
+            for device in board.get_devices():
+                device_dmas += device.get_cpu_dma_ports()
+
+        if len(device_dmas) > 0:
+            for _, port in enumerate(device_dmas):
+                ctrl = ViperCPUDMAController(
+                    self.ruby_system.network, board.get_cache_line_size()
+                )
+                ctrl.dma_sequencer = DMASequencer(
+                    version=len(self._dma_controllers), in_ports=port
+                )
+
+                ctrl.ruby_system = self.ruby_system
+                ctrl.dma_sequencer.ruby_system = self.ruby_system
+
+                self._dma_controllers.append(ctrl)
+
+        # Number of sequencers = one per core pair + one per DMA
+        self.ruby_system.num_of_sequencers = len(self._controllers) * 2 + len(
+            self._dma_controllers
+        )
+
+        # Assign the controllers to their parent objects.
+        self.ruby_system.controllers = self._controllers
+        self.ruby_system.directory_controllers = self._directory_controllers
+
+        if len(self._dma_controllers) != 0:
+            self.ruby_system.dma_controllers = self._dma_controllers
+
+        # Connect the controllers using the network topology
+        self.ruby_system.network.connect(
+            self._controllers
+            + self._directory_controllers
+            + self._dma_controllers
+        )
+        self.ruby_system.network.setup_buffers()
+
+        # Set up a proxy port for the system_port. Used for load binaries and
+        # other functional-only things.
+        self.ruby_system.sys_port_proxy = RubyPortProxy(
+            ruby_system=self.ruby_system
+        )
+        board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
diff --git a/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py b/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py
new file mode 100644
index 0000000000..1012d679db
--- /dev/null
+++ b/src/python/gem5/prebuilt/viper/gpu_cache_hierarchy.py
@@ -0,0 +1,351 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import math
+
+from m5.objects import (
+    AddrRange,
+    DMASequencer,
+    HBM_2000_4H_1x64,
+    MemCtrl,
+    RubyCache,
+    RubySequencer,
+    RubySystem,
+    SrcClockDomain,
+    TreePLRURP,
+    VIPERCoalescer,
+    VoltageDomain,
+)
+
+from ...coherence_protocol import CoherenceProtocol
+from ...components.cachehierarchies.ruby.abstract_ruby_cache_hierarchy import (
+    AbstractRubyCacheHierarchy,
+)
+from ...components.cachehierarchies.ruby.caches.viper.directory import (
+    ViperGPUDirectory,
+)
+from ...components.cachehierarchies.ruby.caches.viper.dma_controller import (
+    ViperGPUDMAController,
+)
+from ...components.cachehierarchies.ruby.caches.viper.sqc import SQCCache
+from ...components.cachehierarchies.ruby.caches.viper.tcc import TCCCache
+from ...components.cachehierarchies.ruby.caches.viper.tcp import TCPCache
+from ...components.devices.gpus.viper_shader import ViperShader
+from ...utils.requires import requires
+from .viper_network import (
+    SimpleDoubleCrossbar,
+    SimplePt2Pt,
+)
+
+
+class ViperGPUCacheHierarchy(AbstractRubyCacheHierarchy):
+    _seqs = 0
+
+    @classmethod
+    def seqCount(cls):
+        # Use SeqCount not class since we need global count
+        cls._seqs += 1
+        return cls._seqs - 1
+
+    def __init__(
+        self,
+        tcp_size: str,
+        tcp_assoc: int,
+        sqc_size: str,
+        sqc_assoc: int,
+        scalar_size: str,
+        scalar_assoc: int,
+        tcc_size: str,
+        tcc_assoc: int,
+        tcc_count: int,
+        cu_per_sqc: int,
+        num_memory_channels: int,
+        cache_line_size: int,
+        shader: ViperShader,
+    ):
+        """
+        :param size: The size of each cache in the heirarchy.
+        :param assoc: The associativity of each cache.
+        """
+        super().__init__()
+
+        self._tcp_size = tcp_size
+        self._tcp_assoc = tcp_assoc
+        self._sqc_size = sqc_size
+        self._sqc_assoc = sqc_assoc
+        self._scalar_size = scalar_size
+        self._scalar_assoc = scalar_assoc
+        self._tcc_size = tcc_size
+        self._tcc_assoc = tcc_assoc
+        self._cache_line_size = cache_line_size
+
+        # We have everything we need to know to create the GPU cache hierarchy
+        # immediately. Therefore, an incorporate_cache method is not part of
+        # this cache hierarchy. Go ahead and incorporate everything now.
+        requires(coherence_protocol_required=CoherenceProtocol.GPU_VIPER)
+
+        self.ruby_gpu = RubySystem()
+        self.ruby_gpu.block_size_bytes = cache_line_size
+
+        # Ruby network for this GPU
+        self.ruby_gpu.network = SimpleDoubleCrossbar(self.ruby_gpu)
+
+        # VIPER uses 6 virtual networks.
+        self.ruby_gpu.number_of_virtual_networks = 6
+        self.ruby_gpu.network.number_of_virtual_networks = 6
+
+        # There is a single local list of all of the controllers to make it
+        # easier to connect everything to the GPU network. This can be
+        # customized depending on the topology/network requirements.
+        self._controllers = []
+        self._directory_controllers = []
+        self._dma_controllers = []
+        self._mem_ctrls = []
+
+        self.clk_domain = SrcClockDomain(
+            clock="1801MHz",
+            voltage_domain=VoltageDomain(),
+        )
+
+        # Variables used by multiple objects are defined once here
+        tcc_bits = int(math.log(tcc_count, 2))
+        deadlock_threshold = 500000
+
+        # Create one TCP per CU
+        compute_units = shader.get_compute_units()
+        for idx, cu in enumerate(compute_units):
+            tcp = TCPCache(
+                tcp_size=self._tcp_size,
+                tcp_assoc=self._tcp_assoc,
+                network=self.ruby_gpu.network,
+                cache_line_size=self._cache_line_size,
+            )
+
+            tcp.version = idx
+
+            tcp.sequencer = RubySequencer(
+                version=self.seqCount(),
+                dcache=tcp.L1cache,
+                ruby_system=self.ruby_gpu,
+                is_cpu_sequencer=True,
+            )
+
+            tcp.coalescer = VIPERCoalescer(
+                version=self.seqCount(),
+                icache=tcp.L1cache,
+                dcache=tcp.L1cache,
+                ruby_system=self.ruby_gpu,
+                support_inst_reqs=False,
+                is_cpu_sequencer=False,
+                deadlock_threshold=deadlock_threshold,
+                max_coalesces_per_cycle=1,
+                gmTokenPort=cu.gmTokenPort,
+            )
+
+            for port_idx in range(cu.wf_size):
+                cu.memory_port[port_idx] = tcp.coalescer.in_ports
+
+            tcp.ruby_system = self.ruby_gpu
+            tcp.TCC_select_num_bits = tcc_bits
+            tcp.use_seq_not_coal = False
+            tcp.issue_latency = 1
+            tcp.clk_domain = self.clk_domain
+            tcp.recycle_latency = 10
+            tcp.WB = False
+            tcp.disableL1 = False
+
+            self._controllers.append(tcp)
+
+        # This check ensures there are a same number of CUs with shared SQC
+        # and Scalar caches.
+        num_cus = len(shader.get_compute_units())
+        assert (num_cus % cu_per_sqc) == 0
+        num_sqcs = num_cus // cu_per_sqc
+
+        for idx in range(num_sqcs):
+            sqc = SQCCache(
+                sqc_size=self._sqc_size,
+                sqc_assoc=self._sqc_assoc,
+                network=self.ruby_gpu.network,
+                cache_line_size=self._cache_line_size,
+            )
+
+            sqc.version = idx
+
+            sqc.sequencer = RubySequencer(
+                version=self.seqCount(),
+                dcache=sqc.L1cache,
+                ruby_system=self.ruby_gpu,
+                support_data_reqs=False,
+                is_cpu_sequencer=False,
+                deadlock_threshold=deadlock_threshold,
+            )
+
+            # SQC is shared across {cu_per_sqc} CUs.
+            cu_base = cu_per_sqc * idx
+            for cu_num in range(cu_per_sqc):
+                cu_id = cu_base + cu_num
+                compute_units[cu_id].sqc_port = sqc.sequencer.in_ports
+
+            sqc.ruby_system = self.ruby_gpu
+            sqc.TCC_select_num_bits = tcc_bits
+            sqc.clk_domain = self.clk_domain
+            sqc.recycle_latency = 10
+
+            self._controllers.append(sqc)
+
+        num_scalars = num_sqcs
+        for idx in range(num_scalars):
+            scalar = SQCCache(
+                sqc_size=self._scalar_size,
+                sqc_assoc=self._scalar_assoc,
+                network=self.ruby_gpu.network,
+                cache_line_size=self._cache_line_size,
+            )
+
+            # Scalar uses same controller as SQC, so add SQC count
+            scalar.version = idx + num_sqcs
+
+            scalar.sequencer = RubySequencer(
+                version=self.seqCount(),
+                dcache=scalar.L1cache,
+                ruby_system=self.ruby_gpu,
+                support_data_reqs=False,
+                is_cpu_sequencer=False,
+                deadlock_threshold=deadlock_threshold,
+            )
+
+            # Scalar cache is shared across {cu_per_sqc} CUs.
+            cu_base = cu_per_sqc * idx
+            for cu_num in range(cu_per_sqc):
+                cu_id = cu_base + cu_num
+                compute_units[cu_id].scalar_port = scalar.sequencer.in_ports
+
+            scalar.ruby_system = self.ruby_gpu
+            scalar.TCC_select_num_bits = tcc_bits
+            scalar.clk_domain = self.clk_domain
+            scalar.recycle_latency = 10
+
+            self._controllers.append(scalar)
+
+        # Create TCCs (GPU L2 cache)
+        for idx in range(tcc_count):
+            tcc = TCCCache(
+                tcc_size=self._tcc_size,
+                tcc_assoc=self._tcc_assoc,
+                network=self.ruby_gpu.network,
+                cache_line_size=self._cache_line_size,
+            )
+
+            tcc.version = idx
+
+            tcc.ruby_system = self.ruby_gpu
+            tcc.WB = False
+            tcc.clk_domain = self.clk_domain
+            tcc.recycle_latency = 10
+
+            self._controllers.append(tcc)
+
+        # Create DMA controllers
+        for i, port in enumerate(shader.get_gpu_dma_ports()):
+            ctrl = ViperGPUDMAController(
+                self.ruby_gpu.network, self._cache_line_size
+            )
+            ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+
+            ctrl.ruby_system = self.ruby_gpu
+            ctrl.dma_sequencer.ruby_system = self.ruby_gpu
+
+            self._dma_controllers.append(ctrl)
+
+        # Create GPU memories. Currently fixed to HBM2.
+        mem_type_cls = HBM_2000_4H_1x64
+
+        # AMDGPUDevice currently tells the driver there is 16GiB for memory.
+        # Until that is a parameter, this need to be fixed to 16GiB.
+        gpu_mem_range = AddrRange(0, size="16GiB")
+        intlv_low_bit = int(math.log(self._cache_line_size, 2))
+        intlv_bits = int(math.log(num_memory_channels, 2))
+
+        for idx in range(num_memory_channels):
+            addr_range = AddrRange(
+                gpu_mem_range.start,
+                size=gpu_mem_range.size(),
+                intlvHighBit=intlv_low_bit + intlv_bits - 1,
+                intlvBits=intlv_bits,
+                intlvMatch=idx,
+                xorHighBit=0,
+            )
+
+            mem_ctrl = MemCtrl(dram=mem_type_cls(range=addr_range))
+            self._mem_ctrls.append(mem_ctrl)
+
+            dir = ViperGPUDirectory(
+                self.ruby_gpu.network,
+                self._cache_line_size,
+                addr_range,
+                self._mem_ctrls[idx].port,
+            )
+
+            dir.ruby_system = self.ruby_gpu
+            dir.TCC_select_num_bits = tcc_bits
+            dir.version = len(self._directory_controllers)
+            self._directory_controllers.append(dir)
+
+            dir.L3CacheMemory = RubyCache(
+                size="16MiB",
+                assoc=16,
+                atomicALUs=64,
+                replacement_policy=TreePLRURP(),
+                resourceStalls=False,
+                dataArrayBanks=16,
+                tagArrayBanks=16,
+                dataAccessLatency=20,
+                tagAccessLatency=15,
+            )
+
+        # Number of sequencers = one per TCP, SQC, and Scalar + one per DMA.
+        self.ruby_gpu.num_of_sequencers = len(self._controllers) + len(
+            self._dma_controllers
+        )
+
+        # Assign the controllers to their parent objects.
+        self.ruby_gpu.controllers = self._controllers
+        self.ruby_gpu.directory_controllers = self._directory_controllers
+
+        # Connect the controllers using the network topology
+        self.ruby_gpu.network.connect(
+            self._controllers
+            + self._directory_controllers
+            + self._dma_controllers
+        )
+        self.ruby_gpu.network.setup_buffers()
+
+    def get_mem_ctrls(self):
+        return self._mem_ctrls
diff --git a/src/python/gem5/prebuilt/viper/viper_network.py b/src/python/gem5/prebuilt/viper/viper_network.py
new file mode 100644
index 0000000000..e22a330126
--- /dev/null
+++ b/src/python/gem5/prebuilt/viper/viper_network.py
@@ -0,0 +1,165 @@
+# Copyright (c) 2021 The Regents of the University of California.
+# All Rights Reserved
+#
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import (
+    SimpleExtLink,
+    SimpleIntLink,
+    SimpleNetwork,
+    Switch,
+)
+
+
+class SimplePt2Pt(SimpleNetwork):
+    """A simple point-to-point network. This does not use garnet."""
+
+    def __init__(self, ruby_system):
+        super().__init__()
+        self.netifs = []
+
+        # TODO: These should be in a base class
+        # https://gem5.atlassian.net/browse/GEM5-1039
+        self.ruby_system = ruby_system
+
+    def connect(self, controllers):
+        """Connect all of the controllers to routers and connect the routers
+        together in a point-to-point network.
+        """
+        # Create one router/switch per controller in the system
+        self.routers = [Switch(router_id=i) for i in range(len(controllers))]
+
+        # Make a link from each controller to the router. The link goes
+        # externally to the network.
+        self.ext_links = [
+            SimpleExtLink(link_id=i, ext_node=c, int_node=self.routers[i])
+            for i, c in enumerate(controllers)
+        ]
+
+        # Make an "internal" link (internal to the network) between every pair
+        # of routers.
+        link_count = 0
+        int_links = []
+        for ri in self.routers:
+            for rj in self.routers:
+                if ri == rj:
+                    continue  # Don't connect a router to itself!
+                link_count += 1
+                int_links.append(
+                    SimpleIntLink(link_id=link_count, src_node=ri, dst_node=rj)
+                )
+        self.int_links = int_links
+
+
+class SimpleDoubleCrossbar(SimpleNetwork):
+    """
+    GPU network with crossbars between CU caches and L2 caches and between L2
+    caches and directories/memory controllers/DMAs using SimpleNetwork.
+    """
+
+    def __init__(self, ruby_system):
+        super().__init__()
+        self.netifs = []
+
+        self.ruby_system = ruby_system
+
+    def connect(self, controllers):
+        l2_xbar_types = ("TCP_Controller", "SQC_Controller", "TCC_Controller")
+        soc_xbar_types = ("DMA_Controller", "Directory_Controller")
+
+        # Create one router per controller plus a crossbar for L2 controllers
+        # and a crossbar for SoC controllers.
+        routers = [Switch(router_id=i) for i in range(len(controllers))]
+        routers.append(Switch(router_id=len(routers)))
+        routers.append(Switch(router_id=len(routers)))
+        self.routers = routers
+
+        # Routers 0 ... N-2 connect to the individual controllers
+        self.ext_links = [
+            SimpleExtLink(link_id=i, ext_node=c, int_node=self.routers[i])
+            for i, c in enumerate(controllers)
+        ]
+
+        # Connect compute unit components and L2s to L2 crossbar in both
+        # directions.
+        l2_xbar_id = len(controllers)
+        soc_xbar_id = l2_xbar_id + 1
+        int_links = []
+
+        for ext_link in self.ext_links:
+            if ext_link.ext_node.type in l2_xbar_types:
+                int_links.append(
+                    SimpleIntLink(
+                        link_id=len(int_links),
+                        src_node=ext_link.int_node,
+                        dst_node=self.routers[l2_xbar_id],
+                    )
+                )
+                int_links.append(
+                    SimpleIntLink(
+                        link_id=len(int_links),
+                        src_node=self.routers[l2_xbar_id],
+                        dst_node=ext_link.int_node,
+                    )
+                )
+            elif ext_link.ext_node.type in soc_xbar_types:
+                int_links.append(
+                    SimpleIntLink(
+                        link_id=len(int_links),
+                        src_node=ext_link.int_node,
+                        dst_node=self.routers[soc_xbar_id],
+                    )
+                )
+                int_links.append(
+                    SimpleIntLink(
+                        link_id=len(int_links),
+                        src_node=self.routers[soc_xbar_id],
+                        dst_node=ext_link.int_node,
+                    )
+                )
+
+        # Connect L2 xbar to SoC xbar.
+        int_links.append(
+            SimpleIntLink(
+                link_id=len(int_links),
+                src_node=self.routers[l2_xbar_id],
+                dst_node=self.routers[soc_xbar_id],
+            )
+        )
+        int_links.append(
+            SimpleIntLink(
+                link_id=len(int_links),
+                src_node=self.routers[soc_xbar_id],
+                dst_node=self.routers[l2_xbar_id],
+            )
+        )
+
+        # Finalize network int_links for unproxy
+        self.int_links = int_links