diff --git a/configs/example/gpufs/amd/AmdGPUOptions.py b/configs/example/gpufs/amd/AmdGPUOptions.py
new file mode 100644
index 0000000000..272c417c25
--- /dev/null
+++ b/configs/example/gpufs/amd/AmdGPUOptions.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2021 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+def addAmdGPUOptions(parser):
+    parser.add_argument("-u", "--num-compute-units", type=int, default=4,
+                        help="number of GPU compute units"),
+    parser.add_argument("--num-cp", type=int, default=0,
+                        help="Number of GPU Command Processors (CP)")
+
+    # not super important now, but to avoid putting the number 4 everywhere,
+    # make it an option/knob
+    parser.add_argument("--cu-per-sqc", type=int, default=4,
+                        help="number of CUs sharing an SQC"
+                        " (icache, and thus icache TLB)")
+    parser.add_argument("--cu-per-scalar-cache", type=int, default=4,
+                        help="Number of CUs sharing a scalar cache")
+    parser.add_argument("--simds-per-cu", type=int, default=4,
+                        help="SIMD units per CU")
+    parser.add_argument("--cu-per-sa", type=int, default=4,
+                        help="Number of CUs per shader array. This must be a"
+                        " multiple of options.cu-per-sqc and "
+                        " options.cu-per-scalar")
+    parser.add_argument("--sa-per-complex", type=int, default=1,
+                        help="Number of shader arrays per complex")
+    parser.add_argument("--num-gpu-complexes", type=int, default=1,
+                        help="Number of GPU complexes")
+    parser.add_argument("--wf-size", type=int, default=64,
+                        help="Wavefront size(in workitems)")
+    parser.add_argument("--sp-bypass-path-length", type=int, default=4,
+                        help="Number of stages of bypass path in vector ALU "
+                        "for Single Precision ops")
+    parser.add_argument("--dp-bypass-path-length", type=int, default=4,
+                        help="Number of stages of bypass path in vector ALU "
+                        "for Double Precision ops")
+    #issue period per SIMD unit: number of cycles before issuing another vector
+    parser.add_argument("--issue-period", type=int, default=4,
+                        help="Number of cycles per vector instruction issue"
+                        " period")
+    parser.add_argument("--glbmem-wr-bus-width", type=int, default=32,
+                        help="VGPR to Coalescer (Global Memory) data bus width"
+                        " in bytes")
+    parser.add_argument("--glbmem-rd-bus-width", type=int, default=32,
+                        help="Coalescer to VGPR (Global Memory) data bus width"
+                        " in bytes")
+    # Currently we only support 1 local memory pipe
+    parser.add_argument("--shr-mem-pipes-per-cu", type=int, default=1,
+                        help="Number of Shared Memory pipelines per CU")
+    # Currently we only support 1 global memory pipe
+    parser.add_argument("--glb-mem-pipes-per-cu", type=int, default=1,
+                        help="Number of Global Memory pipelines per CU")
+    parser.add_argument("--wfs-per-simd", type=int, default=10,
+                        help="Number of WF slots per SIMD")
+
+    parser.add_argument("--registerManagerPolicy", type=str,
+                      default="static", help="Register manager policy")
+    parser.add_argument("--vreg-file-size", type=int, default=2048,
+                        help="number of physical vector registers per SIMD")
+    parser.add_argument("--vreg-min-alloc", type=int, default=4,
+                        help="vector register reservation unit")
+
+    parser.add_argument("--sreg-file-size", type=int, default=2048,
+                        help="number of physical scalar registers per SIMD")
+    parser.add_argument("--sreg-min-alloc", type=int, default=4,
+                        help="scalar register reservation unit")
+
+    parser.add_argument("--bw-scalor", type=int, default=0,
+                        help="bandwidth scalor for scalability analysis")
+    parser.add_argument("--CPUClock", type=str, default="2GHz",
+                        help="CPU clock")
+    parser.add_argument("--gpu-clock", type=str, default="1GHz",
+                        help="GPU clock")
+    parser.add_argument("--cpu-voltage", action="store", type=str,
+                        default='1.0V', help="CPU voltage domain")
+    parser.add_argument("--gpu-voltage", action="store", type=str,
+                        default='1.0V', help="GPU voltage domain")
+    parser.add_argument("--CUExecPolicy", type=str, default="OLDEST-FIRST",
+                        help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)")
+    parser.add_argument("--LocalMemBarrier",action="store_true",
+                        help="Barrier does not wait for writethroughs to "
+                        " complete")
+    parser.add_argument("--countPages", action="store_true",
+                        help="Count Page Accesses and output in "
+                        " per-CU output files")
+    parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for"
+                        "TLBs")
+    parser.add_argument("--pf-type", type=str, help="type of prefetch: "
+                        "PF_CU, PF_WF, PF_PHASE, PF_STRIDE")
+    parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
+    parser.add_argument("--numLdsBanks", type=int, default=32,
+                        help="number of physical banks per LDS module")
+    parser.add_argument("--ldsBankConflictPenalty", type=int, default=1,
+                        help="number of cycles per LDS bank conflict")
+    parser.add_argument("--lds-size", type=int, default=65536,
+                        help="Size of the LDS in bytes")
+    parser.add_argument("--num-hw-queues", type=int, default=10,
+                        help="number of hw queues in packet processor")
+    parser.add_argument("--reg-alloc-policy", type=str, default="simple",
+                        help="register allocation policy (simple/dynamic)")
+
diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
new file mode 100644
index 0000000000..2e606331fb
--- /dev/null
+++ b/configs/example/gpufs/runfs.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2021 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# System includes
+import argparse
+import math
+
+# gem5 related
+import m5
+from m5.objects import *
+from m5.util import addToPath
+
+# gem5 options and objects
+addToPath('../../')
+from ruby import Ruby
+from common import Simulation
+from common import ObjectList
+from common import Options
+from common import GPUTLBOptions
+from common import GPUTLBConfig
+from amd import AmdGPUOptions
+
+# GPU FS related
+from system.system import makeGpuFSSystem
+
+
+def addRunFSOptions(parser):
+    parser.add_argument("--script", default=None,
+                        help="Script to execute in the simulated system")
+    parser.add_argument("--host-parallel", default=False,
+                        action="store_true",
+                        help="Run multiple host threads in KVM mode")
+    parser.add_argument("--disk-image", default="",
+                        help="The boot disk image to mount (/dev/sda)")
+    parser.add_argument("--second-disk", default=None,
+                        help="The second disk image to mount (/dev/sdb)")
+    parser.add_argument("--kernel", default=None, help="Linux kernel to boot")
+    parser.add_argument("--gpu-rom", default=None, help="GPU BIOS to load")
+    parser.add_argument("--gpu-mmio-trace", default=None,
+                        help="GPU MMIO trace to load")
+
+
+def runGpuFSSystem(args):
+    '''
+    This function can be called by higher level scripts designed to simulate
+    specific devices. As a result the scripts typically hard code some args
+    that should not be changed by the user.
+    '''
+
+    # These are used by the protocols. They should not be set by the user.
+    n_cu = args.num_compute_units
+    args.num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
+    args.num_scalar_cache = \
+            int(math.ceil(float(n_cu) / args.cu_per_scalar_cache))
+
+    system = makeGpuFSSystem(args)
+
+    root = Root(full_system = True, system = system,
+                time_sync_enable = True, time_sync_period = '1000us')
+
+    if args.script is not None:
+        system.readfile = args.script
+
+    m5.instantiate()
+
+
+    print("Running the simulation")
+    sim_ticks = args.abs_max_tick
+
+    exit_event = m5.simulate(sim_ticks)
+
+    print('Exiting @ tick %i because %s' %
+          (m5.curTick(), exit_event.getCause()))
+
+
+if __name__ == "__m5_main__":
+    # Add gpufs, common, ruby, amdgpu, and gpu tlb args
+    parser = argparse.ArgumentParser()
+    addRunFSOptions(parser)
+    Options.addCommonOptions(parser)
+    Ruby.define_options(parser)
+    AmdGPUOptions.addAmdGPUOptions(parser)
+    GPUTLBOptions.tlb_options(parser)
+
+    args = parser.parse_args()
+
+    runGpuFSSystem(args)
diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py
new file mode 100644
index 0000000000..b6bf8211c0
--- /dev/null
+++ b/configs/example/gpufs/system/amdgpu.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2021 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import m5
+from m5.objects import *
+
+def createGPU(system, args):
+    shader = Shader(n_wf = args.wfs_per_simd,
+                    timing = True,
+                    clk_domain = system.clk_domain)
+
+    # VIPER GPU protocol implements release consistency at GPU side. So,
+    # we make their writes visible to the global memory and should read
+    # from global memory during kernal boundary. The pipeline initiates
+    # (or do not initiate) the acquire/release operation depending on
+    # these impl_kern_launch_rel and impl_kern_end_rel flags. The flag=true
+    # means pipeline initiates a acquire/release operation at kernel launch/end
+    # VIPER protocol is write-through based, and thus only impl_kern_launch_acq
+    # needs to set.
+    shader.impl_kern_launch_acq = True
+    shader.impl_kern_end_rel = False
+
+    # Switching off per-lane TLB by default
+    per_lane = False
+    if args.TLB_config == "perLane":
+        per_lane = True
+
+    # List of compute units; one GPU can have multiple compute units
+    compute_units = []
+    for i in range(args.num_compute_units):
+        compute_units.append(
+                 ComputeUnit(cu_id = i, perLaneTLB = per_lane,
+                             num_SIMDs = args.simds_per_cu,
+                             wf_size = args.wf_size,
+                             spbypass_pipe_length = \
+                                 args.sp_bypass_path_length,
+                             dpbypass_pipe_length = \
+                                 args.dp_bypass_path_length,
+                             issue_period = args.issue_period,
+                             coalescer_to_vrf_bus_width = \
+                                 args.glbmem_rd_bus_width,
+                             vrf_to_coalescer_bus_width = \
+                                 args.glbmem_wr_bus_width,
+                             num_global_mem_pipes = \
+                                 args.glb_mem_pipes_per_cu,
+                             num_shared_mem_pipes = \
+                                 args.shr_mem_pipes_per_cu,
+                             n_wf = args.wfs_per_simd,
+                             execPolicy = args.CUExecPolicy,
+                             localMemBarrier = args.LocalMemBarrier,
+                             countPages = args.countPages,
+                             localDataStore = \
+                             LdsState(banks = args.numLdsBanks,
+                                      bankConflictPenalty = \
+                                          args.ldsBankConflictPenalty,
+                                      size = args.lds_size)))
+
+        wavefronts = []
+        vrfs = []
+        vrf_pool_mgrs = []
+        srfs = []
+        srf_pool_mgrs = []
+        for j in range(args.simds_per_cu):
+            for k in range(shader.n_wf):
+                wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
+                                            wf_size = args.wf_size))
+
+            if args.reg_alloc_policy == "simple":
+                vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
+                                                   args.vreg_file_size,
+                                                   min_alloc = \
+                                                   args.vreg_min_alloc))
+                srf_pool_mgrs.append(SimplePoolManager(pool_size = \
+                                                   args.sreg_file_size,
+                                                   min_alloc = \
+                                                   args.vreg_min_alloc))
+            elif args.reg_alloc_policy == "dynamic":
+                vrf_pool_mgrs.append(DynPoolManager(pool_size = \
+                                                   args.vreg_file_size,
+                                                   min_alloc = \
+                                                   args.vreg_min_alloc))
+                srf_pool_mgrs.append(DynPoolManager(pool_size = \
+                                                   args.sreg_file_size,
+                                                   min_alloc = \
+                                                   args.vreg_min_alloc))
+
+            vrfs.append(VectorRegisterFile(simd_id=j, wf_size=args.wf_size,
+                                           num_regs=args.vreg_file_size))
+
+            srfs.append(ScalarRegisterFile(simd_id=j, wf_size=args.wf_size,
+                                           num_regs=args.sreg_file_size))
+
+        compute_units[-1].wavefronts = wavefronts
+        compute_units[-1].vector_register_file = vrfs
+        compute_units[-1].scalar_register_file = srfs
+        compute_units[-1].register_manager = \
+            RegisterManager(policy=args.registerManagerPolicy,
+                            vrf_pool_managers=vrf_pool_mgrs,
+                            srf_pool_managers=srf_pool_mgrs)
+        if args.TLB_prefetch:
+            compute_units[-1].prefetch_depth = args.TLB_prefetch
+            compute_units[-1].prefetch_prev_type = args.pf_type
+
+        # Attach the LDS and the CU to the bus (actually a Bridge)
+        compute_units[-1].ldsPort = compute_units[-1].ldsBus.cpu_side_port
+        compute_units[-1].ldsBus.mem_side_port = \
+            compute_units[-1].localDataStore.cuPort
+
+    # Attach compute units to GPU
+    shader.CUs = compute_units
+
+    shader.cpu_pointer = system.cpu[0]
+    shader.eventq_index = 0
+    shader.set_parent(system, "Shader")
+
+    return shader
+
+def connectGPU(system, args):
+    system.pc.south_bridge.gpu = AMDGPUDevice(pci_func=0, pci_dev=8, pci_bus=0)
+
+    system.pc.south_bridge.gpu.trace_file = args.gpu_mmio_trace
+    system.pc.south_bridge.gpu.rom_binary = args.gpu_rom
diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
new file mode 100644
index 0000000000..be4dd570ab
--- /dev/null
+++ b/configs/example/gpufs/system/system.py
@@ -0,0 +1,169 @@
+# Copyright (c) 2021 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# For use for simulation and test purposes only
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from system.amdgpu import *
+
+from m5.util import panic
+
+from common.Benchmarks import *
+from common.FSConfig import *
+from common import Simulation
+from ruby import Ruby
+
+def makeGpuFSSystem(args):
+    # Boot options are standard gem5 options plus:
+    # - Framebuffer device emulation 0 to reduce driver code paths.
+    # - Blacklist amdgpu as it cannot (currently) load in KVM CPU.
+    # - Blacklist psmouse as amdgpu driver adds proprietary commands that
+    #   cause gem5 to panic.
+    boot_options = ['earlyprintk=ttyS0', 'console=ttyS0,9600',
+                    'lpj=7999923', 'root=/dev/sda1',
+                    'drm_kms_helper.fbdev_emulation=0',
+                    'modprobe.blacklist=amdgpu',
+                    'modprobe.blacklist=psmouse']
+    cmdline = ' '.join(boot_options)
+
+    if MemorySize(args.mem_size) < MemorySize('2GB'):
+        panic("Need at least 2GB of system memory to load amdgpu module")
+
+    # Use the common FSConfig to setup a Linux X86 System
+    (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
+    bm = SysConfig(disks=[args.disk_image], mem=args.mem_size)
+    system = makeLinuxX86System(test_mem_mode, args.num_cpus, bm, True,
+                                  cmdline=cmdline)
+    system.workload.object_file = binary(args.kernel)
+
+    # Set the cache line size for the entire system.
+    system.cache_line_size = args.cacheline_size
+
+    # Create a top-level voltage and clock domain.
+    system.voltage_domain = VoltageDomain(voltage = args.sys_voltage)
+    system.clk_domain = SrcClockDomain(clock =  args.sys_clock,
+            voltage_domain = system.voltage_domain)
+
+    # Create a CPU voltage and clock domain.
+    system.cpu_voltage_domain = VoltageDomain()
+    system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock,
+                                             voltage_domain =
+                                             system.cpu_voltage_domain)
+
+    # Create specified number of CPUs. GPUFS really only needs one.
+    system.cpu = [TestCPUClass(clk_domain=system.cpu_clk_domain, cpu_id=i)
+                    for i in range(args.num_cpus)]
+
+    if ObjectList.is_kvm_cpu(TestCPUClass) or \
+        ObjectList.is_kvm_cpu(FutureClass):
+        system.kvm_vm = KvmVM()
+
+    # Create AMDGPU and attach to southbridge
+    shader = createGPU(system, args)
+    connectGPU(system, args)
+
+    # This arbitrary address is something in the X86 I/O hole
+    hsapp_gpu_map_paddr = 0xe00000000
+    gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr,
+                                   numHWQueues=args.num_hw_queues)
+    dispatcher = GPUDispatcher()
+    gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp,
+                                       dispatcher=dispatcher)
+    shader.dispatcher = dispatcher
+    shader.gpu_cmd_proc = gpu_cmd_proc
+
+    # GPU, HSAPP, and GPUCommandProc are DMA devices
+    system._dma_ports.append(gpu_hsapp)
+    system._dma_ports.append(gpu_cmd_proc)
+    system._dma_ports.append(system.pc.south_bridge.gpu)
+
+    gpu_hsapp.pio = system.iobus.mem_side_ports
+    gpu_cmd_proc.pio = system.iobus.mem_side_ports
+    system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports
+
+    # Create Ruby system using Ruby.py for now
+    Ruby.create_system(args, True, system, system.iobus,
+                      system._dma_ports)
+
+    # Create a seperate clock domain for Ruby
+    system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock,
+                                   voltage_domain = system.voltage_domain)
+
+    for (i, cpu) in enumerate(system.cpu):
+        #
+        # Tie the cpu ports to the correct ruby system ports
+        #
+        cpu.clk_domain = system.cpu_clk_domain
+        cpu.createThreads()
+        cpu.createInterruptController()
+
+        system.ruby._cpu_ports[i].connectCpuPorts(cpu)
+
+    # The shader core will be whatever is after the CPU cores are accounted for
+    shader_idx = args.num_cpus
+    system.cpu.append(shader)
+
+    gpu_port_idx = len(system.ruby._cpu_ports) \
+                   - args.num_compute_units - args.num_sqc \
+                   - args.num_scalar_cache
+    gpu_port_idx = gpu_port_idx - args.num_cp * 2
+
+    # Connect token ports. For this we need to search through the list of all
+    # sequencers, since the TCP coalescers will not necessarily be first. Only
+    # TCP coalescers use a token port for back pressure.
+    token_port_idx = 0
+    for i in range(len(system.ruby._cpu_ports)):
+        if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer):
+            system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = \
+                system.ruby._cpu_ports[i].gmTokenPort
+            token_port_idx += 1
+
+    wavefront_size = args.wf_size
+    for i in range(args.num_compute_units):
+        # The pipeline issues wavefront_size number of uncoalesced requests
+        # in one GPU issue cycle. Hence wavefront_size mem ports.
+        for j in range(wavefront_size):
+            system.cpu[shader_idx].CUs[i].memory_port[j] = \
+                      system.ruby._cpu_ports[gpu_port_idx].in_ports[j]
+        gpu_port_idx += 1
+
+    for i in range(args.num_compute_units):
+        if i > 0 and not i % args.cu_per_sqc:
+            gpu_port_idx += 1
+        system.cpu[shader_idx].CUs[i].sqc_port = \
+                system.ruby._cpu_ports[gpu_port_idx].in_ports
+    gpu_port_idx = gpu_port_idx + 1
+
+    for i in range(args.num_compute_units):
+        if i > 0 and not i % args.cu_per_scalar_cache:
+            gpu_port_idx += 1
+        system.cpu[shader_idx].CUs[i].scalar_port = \
+            system.ruby._cpu_ports[gpu_port_idx].in_ports
+    gpu_port_idx = gpu_port_idx + 1
+
+    return system