tests,configs,mem-ruby: Adding Ruby tester for GPU_VIPER
This patch adds the GPU protocol tester that uses data-race-free operation to discover bugs in GPU protocols including GPU_VIPER. For more information please see the following paper and the README: T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free GPU Testing," 2019 IEEE International Symposium on Workload Characterization (IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi: 10.1109/IISWC47752.2019.9042019. Change-Id: Ic9939d131a930d1e7014ed0290601140bdd1499f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32855 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
|
||||
# Copyright (c) 2018-2020 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
@@ -43,145 +43,272 @@ addToPath('../')
|
||||
from common import Options
|
||||
from ruby import Ruby
|
||||
|
||||
# Get paths we might need.
|
||||
config_path = os.path.dirname(os.path.abspath(__file__))
|
||||
config_root = os.path.dirname(config_path)
|
||||
m5_root = os.path.dirname(config_root)
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
Options.addNoISAOptions(parser)
|
||||
|
||||
parser.add_option("--maxloads", metavar="N", default=100,
|
||||
help="Stop after N loads")
|
||||
parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
|
||||
help="Wakeup every N cycles")
|
||||
parser.add_option("-u", "--num-compute-units", type="int", default=1,
|
||||
help="number of compute units in the GPU")
|
||||
parser.add_option("--num-cp", type="int", default=0,
|
||||
help="Number of GPU Command Processors (CP)")
|
||||
# not super important now, but to avoid putting the number 4 everywhere, make
|
||||
# it an option/knob
|
||||
parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs \
|
||||
sharing an SQC (icache, and thus icache TLB)")
|
||||
parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
|
||||
"per CU")
|
||||
parser.add_option("--wf-size", type="int", default=64,
|
||||
help="Wavefront size(in workitems)")
|
||||
parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
|
||||
"WF slots per SIMD")
|
||||
|
||||
#
|
||||
# Add the ruby specific and protocol specific options
|
||||
#
|
||||
parser = optparse.OptionParser()
|
||||
Options.addNoISAOptions(parser)
|
||||
Ruby.define_options(parser)
|
||||
|
||||
exec(compile( \
|
||||
open(os.path.join(config_root, "common", "Options.py")).read(), \
|
||||
os.path.join(config_root, "common", "Options.py"), 'exec'))
|
||||
# GPU Ruby tester options
|
||||
parser.add_option("--cache-size", type="choice", default="small",
|
||||
choices=["small", "large"],
|
||||
help="Cache sizes to use. Small encourages races between \
|
||||
requests and writebacks. Large stresses write-through \
|
||||
and/or write-back GPU caches.")
|
||||
parser.add_option("--system-size", type="choice", default="small",
|
||||
choices=["small", "medium", "large"],
|
||||
help="This option defines how many CUs, CPUs and cache \
|
||||
components in the test system.")
|
||||
parser.add_option("--address-range", type="choice", default="small",
|
||||
choices=["small", "large"],
|
||||
help="This option defines the number of atomic \
|
||||
locations that affects the working set's size. \
|
||||
A small number of atomic locations encourage more \
|
||||
races among threads. The large option stresses cache \
|
||||
resources.")
|
||||
parser.add_option("--episode-length", type="choice", default="short",
|
||||
choices=["short", "medium", "long"],
|
||||
help="This option defines the number of LDs and \
|
||||
STs in an episode. The small option encourages races \
|
||||
between the start and end of an episode. The long \
|
||||
option encourages races between LDs and STs in the \
|
||||
same episode.")
|
||||
parser.add_option("--test-length", type="int", default=1,
|
||||
help="The number of episodes to be executed by each \
|
||||
wavefront. This determines the maximum number, i.e., \
|
||||
val X #WFs, of episodes to be executed in the test.")
|
||||
parser.add_option("--debug-tester", action='store_true',
|
||||
help="This option will turn on DRF checker")
|
||||
parser.add_option("--random-seed", type="int", default=0,
|
||||
help="Random seed number. Default value (i.e., 0) means \
|
||||
using runtime-specific value")
|
||||
parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
#
|
||||
# Set the default cache size and associativity to be very small to encourage
|
||||
# races between requests and writebacks.
|
||||
#
|
||||
options.l1d_size="256B"
|
||||
options.l1i_size="256B"
|
||||
options.l2_size="512B"
|
||||
options.l3_size="1kB"
|
||||
options.l1d_assoc=2
|
||||
options.l1i_assoc=2
|
||||
options.l2_assoc=2
|
||||
options.l3_assoc=2
|
||||
|
||||
# This file can support multiple compute units
|
||||
assert(options.num_compute_units >= 1)
|
||||
n_cu = options.num_compute_units
|
||||
|
||||
options.num_sqc = int((n_cu + options.cu_per_sqc - 1) // options.cu_per_sqc)
|
||||
|
||||
if args:
|
||||
print("Error: script doesn't take any positional arguments")
|
||||
sys.exit(1)
|
||||
|
||||
#
|
||||
# Create the ruby random tester
|
||||
# Set up cache size - 2 options
|
||||
# 0: small cache
|
||||
# 1: large cache
|
||||
#
|
||||
|
||||
# Check to for the GPU_RfO protocol. Other GPU protocols are non-SC and will
|
||||
# not work with the Ruby random tester.
|
||||
assert(buildEnv['PROTOCOL'] == 'GPU_RfO')
|
||||
|
||||
# The GPU_RfO protocol does not support cache flushes
|
||||
check_flush = False
|
||||
|
||||
tester = RubyTester(check_flush=check_flush,
|
||||
checks_to_complete=options.maxloads,
|
||||
wakeup_frequency=options.wakeup_freq,
|
||||
deadlock_threshold=1000000)
|
||||
if (options.cache_size == "small"):
|
||||
options.tcp_size="256B"
|
||||
options.tcp_assoc=2
|
||||
options.tcc_size="1kB"
|
||||
options.tcc_assoc=2
|
||||
elif (options.cache_size == "large"):
|
||||
options.tcp_size="256kB"
|
||||
options.tcp_assoc=16
|
||||
options.tcc_size="1024kB"
|
||||
options.tcc_assoc=16
|
||||
|
||||
#
|
||||
# Create the M5 system. Note that the Memory Object isn't
|
||||
# actually used by the rubytester, but is included to support the
|
||||
# M5 memory size == Ruby memory size checks
|
||||
# Set up system size - 3 options
|
||||
#
|
||||
system = System(cpu=tester, mem_ranges=[AddrRange(options.mem_size)])
|
||||
if (options.system_size == "small"):
|
||||
# 1 CU, 1 CPU, 1 SQC, 1 Scalar
|
||||
options.wf_size = 1
|
||||
options.wavefronts_per_cu = 1
|
||||
options.num_cpus = 1
|
||||
options.cu_per_sqc = 1
|
||||
options.cu_per_scalar_cache = 1
|
||||
options.num_compute_units = 1
|
||||
elif (options.system_size == "medium"):
|
||||
# 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
|
||||
options.wf_size = 16
|
||||
options.wavefronts_per_cu = 4
|
||||
options.num_cpus = 4
|
||||
options.cu_per_sqc = 4
|
||||
options.cu_per_scalar_cache = 4
|
||||
options.num_compute_units = 4
|
||||
elif (options.system_size == "large"):
|
||||
# 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
|
||||
options.wf_size = 32
|
||||
options.wavefronts_per_cu = 4
|
||||
options.num_cpus = 4
|
||||
options.cu_per_sqc = 4
|
||||
options.cu_per_scalar_cache = 4
|
||||
options.num_compute_units = 8
|
||||
|
||||
# Create a top-level voltage domain and clock domain
|
||||
system.voltage_domain = VoltageDomain(voltage=options.sys_voltage)
|
||||
#
|
||||
# Set address range - 2 options
|
||||
# level 0: small
|
||||
# level 1: large
|
||||
# Each location corresponds to a 4-byte piece of data
|
||||
#
|
||||
options.mem_size = '1024MB'
|
||||
if (options.address_range == "small"):
|
||||
num_atomic_locs = 10
|
||||
num_regular_locs_per_atomic_loc = 10000
|
||||
elif (options.address_range == "large"):
|
||||
num_atomic_locs = 100
|
||||
num_regular_locs_per_atomic_loc = 100000
|
||||
|
||||
system.clk_domain = SrcClockDomain(clock=options.sys_clock,
|
||||
voltage_domain=system.voltage_domain)
|
||||
#
|
||||
# Set episode length (# of actions per episode) - 3 options
|
||||
# 0: 10 actions
|
||||
# 1: 100 actions
|
||||
# 2: 500 actions
|
||||
#
|
||||
if (options.episode_length == "short"):
|
||||
eps_length = 10
|
||||
elif (options.episode_length == "medium"):
|
||||
eps_length = 100
|
||||
elif (options.episode_length == "long"):
|
||||
eps_length = 500
|
||||
|
||||
#
|
||||
# Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the
|
||||
# primary check for deadlocks. The tester's deadlock threshold detection is
|
||||
# a secondary check for deadlock. If there is a bug in RubyPort that causes
|
||||
# a packet not to return to the tester properly, the tester will issue a
|
||||
# deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold
|
||||
# to detect deadlock caused by Ruby protocol first before one caused by the
|
||||
# coalescer. Both units are in Ticks
|
||||
#
|
||||
options.cache_deadlock_threshold = 1e8
|
||||
tester_deadlock_threshold = 1e9
|
||||
|
||||
# For now we're testing only GPU protocol, so we force num_cpus to be 0
|
||||
options.num_cpus = 0
|
||||
|
||||
# Number of CUs
|
||||
n_CUs = options.num_compute_units
|
||||
|
||||
# Set test length, i.e., number of episodes per wavefront * #WFs.
|
||||
# Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
|
||||
n_WFs = n_CUs * options.wavefronts_per_cu
|
||||
max_episodes = options.test_length * n_WFs
|
||||
|
||||
# Number of SQC and Scalar caches
|
||||
assert(n_CUs % options.cu_per_sqc == 0)
|
||||
n_SQCs = n_CUs // options.cu_per_sqc
|
||||
options.num_sqc = n_SQCs
|
||||
|
||||
assert(options.cu_per_scalar_cache != 0)
|
||||
n_Scalars = n_CUs // options.cu_per_scalar_cache
|
||||
options.num_scalar_cache = n_Scalars
|
||||
|
||||
#
|
||||
# Create GPU Ruby random tester
|
||||
#
|
||||
tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
|
||||
cus_per_scalar = options.cu_per_scalar_cache,
|
||||
wavefronts_per_cu = options.wavefronts_per_cu,
|
||||
workitems_per_wavefront = options.wf_size,
|
||||
num_atomic_locations = num_atomic_locs,
|
||||
num_normal_locs_per_atomic = \
|
||||
num_regular_locs_per_atomic_loc,
|
||||
max_num_episodes = max_episodes,
|
||||
episode_length = eps_length,
|
||||
debug_tester = options.debug_tester,
|
||||
random_seed = options.random_seed,
|
||||
log_file = options.log_file)
|
||||
|
||||
#
|
||||
# Create a gem5 system. Note that the memory object isn't actually used by the
|
||||
# tester, but is included to ensure the gem5 memory size == Ruby memory size
|
||||
# checks. The system doesn't have real CPUs or CUs. It just has a tester that
|
||||
# has physical ports to be connected to Ruby
|
||||
#
|
||||
system = System(cpu = tester,
|
||||
mem_ranges = [AddrRange(options.mem_size)],
|
||||
cache_line_size = options.cacheline_size,
|
||||
mem_mode = 'timing')
|
||||
|
||||
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||
system.clk_domain = SrcClockDomain(clock = options.sys_clock,
|
||||
voltage_domain = system.voltage_domain)
|
||||
|
||||
#
|
||||
# Command processor is not needed for the tester since we don't run real
|
||||
# kernels. Setting it to zero disables the VIPER protocol from creating
|
||||
# a command processor and its caches.
|
||||
#
|
||||
options.num_cp = 0
|
||||
|
||||
#
|
||||
# Create the Ruby system
|
||||
#
|
||||
Ruby.create_system(options, False, system)
|
||||
|
||||
# Create a seperate clock domain for Ruby
|
||||
system.ruby.clk_domain = SrcClockDomain(clock=options.ruby_clock,
|
||||
voltage_domain=system.voltage_domain)
|
||||
|
||||
tester.num_cpus = len(system.ruby._cpu_ports)
|
||||
|
||||
#
|
||||
# The tester is most effective when randomization is turned on and
|
||||
# artifical delay is randomly inserted on messages
|
||||
#
|
||||
system.ruby.randomization = True
|
||||
|
||||
for ruby_port in system.ruby._cpu_ports:
|
||||
# Assert that we got the right number of Ruby ports
|
||||
assert(len(system.ruby._cpu_ports) == n_CUs + n_SQCs + n_Scalars)
|
||||
|
||||
#
|
||||
# Tie the ruby tester ports to the ruby cpu read and write ports
|
||||
#
|
||||
if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
|
||||
tester.cpuInstDataPort = ruby_port.slave
|
||||
elif ruby_port.support_data_reqs:
|
||||
tester.cpuDataPort = ruby_port.slave
|
||||
elif ruby_port.support_inst_reqs:
|
||||
tester.cpuInstPort = ruby_port.slave
|
||||
|
||||
# Do not automatically retry stalled Ruby requests
|
||||
#
|
||||
# Attach Ruby ports to the tester in the order:
|
||||
# cpu_sequencers,
|
||||
# vector_coalescers,
|
||||
# sqc_sequencers,
|
||||
# scalar_sequencers
|
||||
#
|
||||
# Note that this requires the protocol to create sequencers in this order
|
||||
#
|
||||
print("Attaching ruby ports to the tester")
|
||||
for i, ruby_port in enumerate(system.ruby._cpu_ports):
|
||||
ruby_port.no_retry_on_stall = True
|
||||
|
||||
#
|
||||
# Tell each sequencer this is the ruby tester so that it
|
||||
# copies the subblock back to the checker
|
||||
#
|
||||
ruby_port.using_ruby_tester = True
|
||||
|
||||
# -----------------------
|
||||
# run simulation
|
||||
# -----------------------
|
||||
if i < n_CUs:
|
||||
tester.cu_vector_ports = ruby_port.in_ports
|
||||
tester.cu_token_ports = ruby_port.gmTokenPort
|
||||
tester.max_cu_tokens = 4*n_WFs
|
||||
elif i < (n_CUs + n_SQCs):
|
||||
tester.cu_sqc_ports = ruby_port.in_ports
|
||||
else:
|
||||
tester.cu_scalar_ports = ruby_port.in_ports
|
||||
|
||||
root = Root( full_system = False, system = system )
|
||||
root.system.mem_mode = 'timing'
|
||||
i += 1
|
||||
|
||||
#
|
||||
# No CPU threads are needed for GPU tester
|
||||
#
|
||||
tester.cpu_threads = []
|
||||
|
||||
#
|
||||
# Create GPU wavefronts
|
||||
#
|
||||
thread_clock = SrcClockDomain(clock = '1GHz',
|
||||
voltage_domain = system.voltage_domain)
|
||||
wavefronts = []
|
||||
g_thread_idx = 0
|
||||
print("Creating %i WFs attached to %i CUs" % \
|
||||
(n_CUs * tester.wavefronts_per_cu, n_CUs))
|
||||
for cu_idx in range(n_CUs):
|
||||
for wf_idx in range(tester.wavefronts_per_cu):
|
||||
wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
|
||||
cu_id = cu_idx,
|
||||
num_lanes = options.wf_size,
|
||||
clk_domain = thread_clock,
|
||||
deadlock_threshold = \
|
||||
tester_deadlock_threshold))
|
||||
g_thread_idx += 1
|
||||
tester.wavefronts = wavefronts
|
||||
|
||||
#
|
||||
# Run simulation
|
||||
#
|
||||
root = Root(full_system = False, system = system)
|
||||
|
||||
# Not much point in this being higher than the L1 latency
|
||||
m5.ticks.setGlobalFrequency('1ns')
|
||||
|
||||
# instantiate configuration
|
||||
# Instantiate configuration
|
||||
m5.instantiate()
|
||||
|
||||
# simulate until program terminates
|
||||
exit_event = m5.simulate(options.abs_max_tick)
|
||||
# Simulate until tester completes
|
||||
exit_event = m5.simulate()
|
||||
|
||||
print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
|
||||
print('Exiting tick: ', m5.curTick())
|
||||
print('Exiting because ', exit_event.getCause())
|
||||
|
||||
39
src/cpu/testers/gpu_ruby_test/CpuThread.py
Normal file
39
src/cpu/testers/gpu_ruby_test/CpuThread.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.params import *
|
||||
from m5.proxy import *
|
||||
|
||||
from m5.objects.GpuThread import GpuThread
|
||||
|
||||
class CpuThread(GpuThread):
|
||||
type = 'CpuThread'
|
||||
cxx_header = "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||
42
src/cpu/testers/gpu_ruby_test/GpuThread.py
Normal file
42
src/cpu/testers/gpu_ruby_test/GpuThread.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.objects.ClockedObject import ClockedObject
|
||||
from m5.params import *
|
||||
from m5.proxy import *
|
||||
|
||||
class GpuThread(ClockedObject):
|
||||
type = 'GpuThread'
|
||||
abstract = True
|
||||
cxx_header = "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||
thread_id = Param.Int("Unique GpuThread ID")
|
||||
num_lanes = Param.Int("Number of lanes this thread has")
|
||||
deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
|
||||
40
src/cpu/testers/gpu_ruby_test/GpuWavefront.py
Normal file
40
src/cpu/testers/gpu_ruby_test/GpuWavefront.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.params import *
|
||||
from m5.proxy import *
|
||||
|
||||
from m5.objects.GpuThread import GpuThread
|
||||
|
||||
class GpuWavefront(GpuThread):
|
||||
type = 'GpuWavefront'
|
||||
cxx_header = "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||
cu_id = Param.Int("Compute Unit ID")
|
||||
64
src/cpu/testers/gpu_ruby_test/ProtocolTester.py
Normal file
64
src/cpu/testers/gpu_ruby_test/ProtocolTester.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.objects.ClockedObject import ClockedObject
|
||||
from m5.params import *
|
||||
from m5.proxy import *
|
||||
|
||||
class ProtocolTester(ClockedObject):
|
||||
type = 'ProtocolTester'
|
||||
cxx_header = "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||
|
||||
cpu_ports = VectorRequestPort("Ports for CPUs")
|
||||
cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
|
||||
cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
|
||||
cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
|
||||
|
||||
cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
|
||||
cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
|
||||
|
||||
wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
|
||||
workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
|
||||
|
||||
cpu_threads = VectorParam.CpuThread("All cpus")
|
||||
wavefronts = VectorParam.GpuWavefront("All wavefronts")
|
||||
|
||||
num_atomic_locations = Param.Int(2, "Number of atomic locations")
|
||||
num_normal_locs_per_atomic = Param.Int(1000, \
|
||||
"Number of normal locations per atomic")
|
||||
|
||||
episode_length = Param.Int(10, "Number of actions per episode")
|
||||
max_num_episodes = Param.Int(20, "Maximum number of episodes")
|
||||
debug_tester = Param.Bool(False, "Are we debugging the tester?")
|
||||
random_seed = Param.Int(0, "Random seed number. Default value (0) means \
|
||||
using runtime-specific value.")
|
||||
log_file = Param.String("Log file's name")
|
||||
system = Param.System(Parent.any, "System we belong to")
|
||||
129
src/cpu/testers/gpu_ruby_test/README
Normal file
129
src/cpu/testers/gpu_ruby_test/README
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
This directory contains a tester for gem5 GPU protocols. Unlike the Ruby random
|
||||
teter, this tester does not rely on sequential consistency. Instead, it
|
||||
assumes tested protocols supports release consistency.
|
||||
|
||||
----- Getting Started -----
|
||||
|
||||
To start using the tester quickly, you can use the following example command
|
||||
line to get running immediately:
|
||||
|
||||
build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
|
||||
--test-length=1000 --system-size=medium --cache-size=small
|
||||
|
||||
An overview of the main command line options is as follows. For all options
|
||||
use `build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help`
|
||||
or see the configuration file.
|
||||
|
||||
* --cache-size (small, large): Use smaller sizes for testing evict, etc.
|
||||
* --system-size (small, medium, large): Effectively the number of threads in
|
||||
the GPU model. Large size will have more contention. Larger
|
||||
sizes are useful for checking contention.
|
||||
* --episode-length (short, medium, long): Number of loads and stores in an
|
||||
episode. Episodes will also have atomics mixed in. See below
|
||||
for a definition of episode.
|
||||
* --test-length (int): Number of episodes to execute. This will determine the
|
||||
amount of time the tester runs for. Longer time will stress
|
||||
the protocol harder.
|
||||
|
||||
The remainder of this file describes the theory behind the tester design and
|
||||
a link to a more detailed research paper is provided at the end.
|
||||
|
||||
----- Theory Overview -----
|
||||
|
||||
The GPU Ruby tester creates a system consisting of both CPU threads and GPU
|
||||
wavefronts. CPU threads are scalar, so there is one lane per CPU thread. GPU
|
||||
wavefront may have multiple lanes. The number of lanes is initialized when
|
||||
a thread/wavefront is created.
|
||||
|
||||
Each thread/wavefront executes a number of episodes. Each episode is a series
|
||||
of memory actions (i.e., atomic, load, store, acquire and release). In a
|
||||
wavefront, all lanes execute the same sequence of actions, but they may target
|
||||
different addresses. One can think of an episode as a critical section which
|
||||
is bounded by a lock acquire in the beginning and a lock release at the end. An
|
||||
episode consists of actions in the following order:
|
||||
|
||||
1 - Atomic action
|
||||
2 - Acquire action
|
||||
3 - A number of load and store actions
|
||||
4 - Release action
|
||||
5 - Atomic action that targets the same address as (1) does
|
||||
|
||||
There are two separate set of addresses: atomic and non-atomic. Atomic actions
|
||||
target only atomic addresses. Load and store actions target only non-atomic
|
||||
addresses. Memory addresses are all 4-byte aligned in the tester.
|
||||
|
||||
To test false sharing cases in which both atomic and non-atomic addresses are
|
||||
placed in the same cache line, we abstract out the concept of memory addresses
|
||||
from the tester's perspective by introducing the concept of location. Locations
|
||||
are numbered from 0 to N-1 (if there are N addresses). The first X locations
|
||||
[0..X-1] are atomic locations, and the rest are non-atomic locations.
|
||||
The 1-1 mapping between locations and addresses are randomly created when the
|
||||
tester is initialized.
|
||||
|
||||
Per load and store action, its target location is selected so that there is no
|
||||
data race in the generated stream of memory requests at any time during the
|
||||
test. Since in Data-Race-Free model, the memory system's behavior is undefined
|
||||
in data race cases, we exclude data race scenarios from our protocol test.
|
||||
|
||||
Once location per load/store action is determined, each thread/wavefront either
|
||||
loads current value at the location or stores an incremental value to that
|
||||
location. The tester maintains a table tracking all last writers and their
|
||||
written values, so we know what value should be returned from a load and what
|
||||
value should be written next at a particular location. Value returned from a
|
||||
load must match with the value written by the last writer.
|
||||
|
||||
----- Directory Structure -----
|
||||
|
||||
ProtocolTester.hh/cc -- This is the main tester class that orchestrates the
|
||||
entire test.
|
||||
AddressManager.hh/cc -- This manages address space, randomly maps address to
|
||||
location, generates locations for all episodes,
|
||||
maintains per-location last writer and validates
|
||||
values returned from load actions.
|
||||
GpuThread.hh/cc -- This is abstract class for CPU threads and GPU
|
||||
wavefronts. It generates and executes a series of
|
||||
episodes.
|
||||
CpuThread.hh/cc -- Thread class for CPU threads. Not fully implemented yet
|
||||
GpuWavefront.hh/cc -- GpuThread class for GPU wavefronts.
|
||||
Episode.hh/cc -- Class to encapsulate an episode, notably including
|
||||
episode load/store structure and ordering.
|
||||
|
||||
For more detail, please see the following paper:
|
||||
|
||||
T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free
|
||||
GPU Testing," 2019 IEEE International Symposium on Workload Characterization
|
||||
(IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi:
|
||||
10.1109/IISWC47752.2019.9042019.
|
||||
54
src/cpu/testers/gpu_ruby_test/SConscript
Normal file
54
src/cpu/testers/gpu_ruby_test/SConscript
Normal file
@@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
|
||||
Import('*')
|
||||
|
||||
if not env['BUILD_GPU']:
|
||||
Return()
|
||||
|
||||
if env['PROTOCOL'] == 'None':
|
||||
Return()
|
||||
|
||||
SimObject('ProtocolTester.py')
|
||||
SimObject('GpuThread.py')
|
||||
SimObject('CpuThread.py')
|
||||
SimObject('GpuWavefront.py')
|
||||
|
||||
Source('address_manager.cc')
|
||||
Source('episode.cc')
|
||||
Source('protocol_tester.cc')
|
||||
Source('gpu_thread.cc')
|
||||
Source('cpu_thread.cc')
|
||||
Source('gpu_wavefront.cc')
|
||||
|
||||
DebugFlag('ProtocolTest')
|
||||
431
src/cpu/testers/gpu_ruby_test/address_manager.cc
Normal file
431
src/cpu/testers/gpu_ruby_test/address_manager.cc
Normal file
@@ -0,0 +1,431 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "base/random.hh"
|
||||
#include "base/trace.hh"
|
||||
|
||||
const int AddressManager::INVALID_VALUE = -1;
|
||||
const int AddressManager::INVALID_LOCATION = -1;
|
||||
|
||||
AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
|
||||
: numAtomicLocs(n_atomic_locs),
|
||||
numLocsPerAtomic(n_normal_locs_per_atomic)
|
||||
{
|
||||
assert(numAtomicLocs > 0 && numLocsPerAtomic > 0);
|
||||
numNormalLocs = numAtomicLocs * numLocsPerAtomic;
|
||||
|
||||
// generate random address map
|
||||
randAddressMap.resize(numAtomicLocs + numNormalLocs);
|
||||
for (Location i = 0; i < numAtomicLocs + numNormalLocs; ++i) {
|
||||
// all addresses are sizeof(Value) (i.e., 4-byte) aligned
|
||||
randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value)));
|
||||
}
|
||||
|
||||
// randomly shuffle randAddressMap
|
||||
std::random_shuffle(randAddressMap.begin(), randAddressMap.end());
|
||||
|
||||
// initialize atomic locations
|
||||
// first and last normal location per atomic location
|
||||
Location first, last;
|
||||
for (Location atomic_loc = 0; atomic_loc < numAtomicLocs; ++atomic_loc) {
|
||||
first = numAtomicLocs + numLocsPerAtomic * atomic_loc;
|
||||
last = first + numLocsPerAtomic - 1;
|
||||
atomicStructs.push_back(new AtomicStruct(atomic_loc, first, last));
|
||||
}
|
||||
|
||||
// initialize log table
|
||||
for (Location loc = 0; loc < numAtomicLocs + numNormalLocs; ++loc) {
|
||||
logTable.push_back(new LastWriter());
|
||||
}
|
||||
}
|
||||
|
||||
AddressManager::~AddressManager()
|
||||
{
|
||||
for (AtomicStruct* atomic_struct : atomicStructs)
|
||||
delete atomic_struct;
|
||||
for (LastWriter* lw : logTable)
|
||||
delete lw;
|
||||
}
|
||||
|
||||
Addr
|
||||
AddressManager::getAddress(Location loc)
|
||||
{
|
||||
assert(loc < numAtomicLocs + numNormalLocs && loc >= 0);
|
||||
return randAddressMap[loc];
|
||||
}
|
||||
|
||||
AddressManager::Location
|
||||
AddressManager::getAtomicLoc()
|
||||
{
|
||||
Location ret_atomic_loc = random() % numAtomicLocs;
|
||||
atomicStructs[ret_atomic_loc]->startLocSelection();
|
||||
return ret_atomic_loc;
|
||||
}
|
||||
|
||||
AddressManager::Location
|
||||
AddressManager::getLoadLoc(Location atomic_loc)
|
||||
{
|
||||
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||
return atomicStructs[atomic_loc]->getLoadLoc();
|
||||
}
|
||||
|
||||
AddressManager::Location
|
||||
AddressManager::getStoreLoc(Location atomic_loc)
|
||||
{
|
||||
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||
return atomicStructs[atomic_loc]->getStoreLoc();
|
||||
}
|
||||
|
||||
void
|
||||
AddressManager::finishLocSelection(Location atomic_loc)
|
||||
{
|
||||
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||
atomicStructs[atomic_loc]->endLocSelection();
|
||||
}
|
||||
|
||||
void
|
||||
AddressManager::releaseLocation(Location atomic_loc, Location loc)
|
||||
{
|
||||
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||
atomicStructs[atomic_loc]->releaseLoc(loc);
|
||||
}
|
||||
|
||||
std::string
|
||||
AddressManager::printLastWriter(Location loc) const
|
||||
{
|
||||
return logTable[loc]->print();
|
||||
}
|
||||
|
||||
// ------------------- AtomicStruct --------------------------
|
||||
AddressManager::AtomicStruct::AtomicStruct(Location atomic_loc,
|
||||
Location loc_begin,
|
||||
Location loc_end)
|
||||
{
|
||||
// the location range must have at least 1 location
|
||||
assert(loc_begin <= loc_end);
|
||||
|
||||
atomicLoc = atomic_loc;
|
||||
arraySize = loc_end - loc_begin + 1;
|
||||
locationBase = loc_begin;
|
||||
|
||||
// allocate an array of arrray_size
|
||||
locArray = new Location[arraySize];
|
||||
|
||||
// initialize locArray & locProps
|
||||
Location loc;
|
||||
for (int offset = 0; offset < arraySize; ++offset) {
|
||||
loc = locationBase + offset;
|
||||
locArray[offset] = loc;
|
||||
locProps.push_back(LocProperty(offset, 0));
|
||||
}
|
||||
|
||||
// region (1) and (3) are initially empty
|
||||
firstMark = 0;
|
||||
secondMark = arraySize;
|
||||
// no request made at this location so far
|
||||
requestCount = 0;
|
||||
}
|
||||
|
||||
AddressManager::AtomicStruct::~AtomicStruct()
|
||||
{
|
||||
delete[] locArray;
|
||||
}
|
||||
|
||||
void
|
||||
AddressManager::AtomicStruct::startLocSelection()
|
||||
{
|
||||
assert(firstMark >= 0);
|
||||
assert(firstMark <= secondMark);
|
||||
assert(secondMark <= arraySize);
|
||||
// make sure loadStoreMap has been cleared
|
||||
assert(loadStoreMap.empty());
|
||||
|
||||
// this atomic location is picked for Atomic_ACQ
|
||||
// and Atomic_REL in an episode
|
||||
requestCount += 2;
|
||||
// add two expected values in expectedValues set
|
||||
expectedValues.insert(requestCount - 1);
|
||||
expectedValues.insert(requestCount - 2);
|
||||
}
|
||||
|
||||
AddressManager::Location
|
||||
AddressManager::AtomicStruct::getLoadLoc()
|
||||
{
|
||||
assert(firstMark >= 0);
|
||||
assert(firstMark <= secondMark);
|
||||
assert(secondMark <= arraySize);
|
||||
|
||||
if (firstMark == arraySize) {
|
||||
// no location can be picked for a LD now, so return an empty location
|
||||
return INVALID_LOCATION;
|
||||
} else {
|
||||
// we can pick any location btw
|
||||
// locArray [firstMark : arraySize-1]
|
||||
int range_size = arraySize - firstMark;
|
||||
Location ret_loc = locArray[firstMark + random() % range_size];
|
||||
|
||||
// update loadStoreMap
|
||||
LdStMap::iterator it = loadStoreMap.find(ret_loc);
|
||||
|
||||
if (it == loadStoreMap.end()) {
|
||||
// insert a new entry to the map b/c the entry is not there yet
|
||||
// to mark this location has been picked for a LD
|
||||
loadStoreMap.insert(std::pair<Location, LdStBits>
|
||||
(ret_loc, LdStBits(true,false)));
|
||||
} else {
|
||||
// otherwise, just update the LD bit
|
||||
(it->second).first = true;
|
||||
}
|
||||
|
||||
return ret_loc;
|
||||
}
|
||||
}
|
||||
|
||||
AddressManager::Location
|
||||
AddressManager::AtomicStruct::getStoreLoc()
|
||||
{
|
||||
assert(firstMark >= 0);
|
||||
assert(firstMark <= secondMark);
|
||||
assert(secondMark <= arraySize);
|
||||
|
||||
if (firstMark == secondMark) {
|
||||
// no location can be picked for a ST now, return an invalid location
|
||||
return INVALID_LOCATION;
|
||||
} else {
|
||||
// we can pick any location btw [firstMark : secondMark-1]
|
||||
int range_size = secondMark - firstMark;
|
||||
Location ret_loc = locArray[firstMark + random() % range_size];
|
||||
|
||||
// update loadStoreMap
|
||||
LdStMap::iterator it = loadStoreMap.find(ret_loc);
|
||||
|
||||
if (it == loadStoreMap.end()) {
|
||||
// insert a new entry to the map b/c the entry is not there yet
|
||||
// to mark this location has been picked for a ST
|
||||
loadStoreMap.insert(std::pair<Location, LdStBits>
|
||||
(ret_loc, LdStBits(false,true)));
|
||||
} else {
|
||||
// otherwise, just update the ST bit
|
||||
(it->second).second = true;
|
||||
}
|
||||
|
||||
return ret_loc;
|
||||
}
|
||||
}
|
||||
|
||||
// for each entry in loadStoreMap,
|
||||
// if <LD_bit, ST_bit> == <1,0>
|
||||
// - if the location is in (2), then move it to (3)
|
||||
// - if the location is in (3), no move
|
||||
// - otherwise, throw an error
|
||||
// if <LD_bit, ST_bit> == <0,1> or <1,1>
|
||||
// - move it from (2) to (1)
|
||||
void
|
||||
AddressManager::AtomicStruct::endLocSelection()
|
||||
{
|
||||
assert(firstMark >= 0);
|
||||
assert(firstMark <= secondMark);
|
||||
assert(secondMark <= arraySize);
|
||||
|
||||
for (auto& it : loadStoreMap) {
|
||||
Location loc = it.first;
|
||||
LdStBits p = it.second;
|
||||
|
||||
assert(loc >= locationBase && loc < locationBase + arraySize);
|
||||
LocProperty& loc_prop = locProps[loc - locationBase];
|
||||
|
||||
if (p.first && !p.second) {
|
||||
// this location has been picked for LD(s) but not ST
|
||||
// it must be in either region (2) or (3)
|
||||
assert(inSecondRegion(loc_prop.first) ||
|
||||
inThirdRegion(loc_prop.first));
|
||||
|
||||
if (inSecondRegion(loc_prop.first)) {
|
||||
// there is no owner of this location yet
|
||||
assert(loc_prop.second == 0);
|
||||
|
||||
// pick the last location in (2) to swap
|
||||
Location swapped_loc = locArray[secondMark - 1];
|
||||
LocProperty& swapped_loc_prop =
|
||||
locProps[swapped_loc - locationBase];
|
||||
|
||||
// swap loc and swapped_loc
|
||||
swap(loc_prop, swapped_loc_prop);
|
||||
|
||||
// then, expand (3)
|
||||
secondMark--;
|
||||
}
|
||||
|
||||
// increment the location's number of owners
|
||||
loc_prop.second++;
|
||||
} else if (p.second) {
|
||||
// this location has been picked for ST(s) and/or LD(s)
|
||||
// it must be in region (2)
|
||||
assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
|
||||
|
||||
// pick the first location in (2) to swap
|
||||
Location swapped_loc = locArray[firstMark];
|
||||
LocProperty& swapped_loc_prop =
|
||||
locProps[swapped_loc - locationBase];
|
||||
|
||||
// swap loc and swapped_loc
|
||||
swap(loc_prop, swapped_loc_prop);
|
||||
|
||||
// then, expand (1)
|
||||
firstMark++;
|
||||
|
||||
// increment the location's number of owners
|
||||
loc_prop.second++;
|
||||
} else {
|
||||
panic("Location in loadStoreMap but wasn't picked in any"
|
||||
" action\n");
|
||||
}
|
||||
}
|
||||
|
||||
// clear the ld_st_map
|
||||
loadStoreMap.clear();
|
||||
}
|
||||
|
||||
void
|
||||
AddressManager::AtomicStruct::releaseLoc(Location loc)
|
||||
{
|
||||
assert(loc >= locationBase && loc < locationBase + arraySize);
|
||||
|
||||
LocProperty& loc_prop = locProps[loc - locationBase];
|
||||
|
||||
if (inFirstRegion(loc_prop.first)) {
|
||||
// this location must have exactly 1 owner
|
||||
assert(loc_prop.second == 1);
|
||||
|
||||
// pick the last location in region 1 to swap
|
||||
Location swapped_loc = locArray[firstMark - 1];
|
||||
LocProperty& swapped_loc_prop = locProps[swapped_loc - locationBase];
|
||||
|
||||
// swap loc and swapped_loc
|
||||
swap(loc_prop, swapped_loc_prop);
|
||||
|
||||
// then shrink (1)
|
||||
firstMark--;
|
||||
|
||||
// reset the location's number of owners
|
||||
loc_prop.second = 0;
|
||||
} else if (inThirdRegion(loc_prop.first)) {
|
||||
// this location must have at least 1 owner
|
||||
assert(loc_prop.second >= 1);
|
||||
|
||||
if (loc_prop.second == 1) {
|
||||
// pick the first location in region 3 to swap
|
||||
Location swapped_loc = locArray[secondMark];
|
||||
LocProperty& swapped_loc_prop =
|
||||
locProps[swapped_loc - locationBase];
|
||||
|
||||
// swap loc and swapped_loc
|
||||
swap(loc_prop, swapped_loc_prop);
|
||||
|
||||
// then shrink (3)
|
||||
secondMark++;
|
||||
}
|
||||
// decrement the loc's number of owners
|
||||
loc_prop.second--;
|
||||
} else {
|
||||
// some one else must already reset this counter
|
||||
assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
AddressManager::AtomicStruct::isExpectedValue(Value val)
|
||||
{
|
||||
ExpectedValueSet::iterator it = expectedValues.find(val);
|
||||
|
||||
if (it == expectedValues.end()) {
|
||||
std::stringstream exp_val_ss;
|
||||
for (auto& val : expectedValues) {
|
||||
exp_val_ss << " " << val;
|
||||
}
|
||||
|
||||
warn("Expected return values are:\n\t%s\n", exp_val_ss.str());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// erase this value b/c it's done
|
||||
expectedValues.erase(it);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
AddressManager::AtomicStruct::swap(LocProperty& prop_1, LocProperty& prop_2)
|
||||
{
|
||||
int new_idx_1 = prop_2.first;
|
||||
int new_idx_2 = prop_1.first;
|
||||
|
||||
// swap the two locations in locArray
|
||||
Location tmp = locArray[prop_1.first];
|
||||
locArray[prop_1.first] = locArray[prop_2.first];
|
||||
locArray[prop_2.first] = tmp;
|
||||
|
||||
// update their new indices
|
||||
prop_1.first = new_idx_1;
|
||||
prop_2.first = new_idx_2;
|
||||
}
|
||||
|
||||
// ------------------ log table ---------------------
|
||||
void
|
||||
AddressManager::updateLogTable(Location loc, int thread_id, int episode_id,
|
||||
Value new_value, Tick cur_tick, int cu_id)
|
||||
{
|
||||
assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
|
||||
logTable[loc]->update(thread_id, cu_id, episode_id, new_value, cur_tick);
|
||||
}
|
||||
|
||||
AddressManager::Value
|
||||
AddressManager::getLoggedValue(Location loc) const
|
||||
{
|
||||
assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
|
||||
return logTable[loc]->getLastStoredValue();
|
||||
}
|
||||
|
||||
bool
|
||||
AddressManager::validateAtomicResp(Location loc, Value ret_val)
|
||||
{
|
||||
assert(loc >= 0 && loc < numAtomicLocs);
|
||||
return atomicStructs[loc]->isExpectedValue(ret_val);
|
||||
}
|
||||
274
src/cpu/testers/gpu_ruby_test/address_manager.hh
Normal file
274
src/cpu/testers/gpu_ruby_test/address_manager.hh
Normal file
@@ -0,0 +1,274 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
|
||||
#define CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "base/types.hh"
|
||||
#include "sim/eventq.hh"
|
||||
|
||||
/*
|
||||
* --- AddressManager has 3 main tasks ---
|
||||
* (1) generate DRF request sequences
|
||||
* (2) maintain internal log table
|
||||
* (3) validate return values against ones in the log table
|
||||
*
|
||||
* A location is an abstract index of a unique real address.
|
||||
* It's used internally within the tester only.
|
||||
* randAddressMap has the mapping between a location and its real address.
|
||||
*
|
||||
* A value is an integer that a location in real memory can store.
|
||||
* for now, we assume a value is 4-byte
|
||||
*
|
||||
* The location range (randAddressMap) has two distinct parts:
|
||||
* Atomic locations: in the 1st part of randAddressMap &
|
||||
* Non-atomic locations (or just locations): in the 2nd part
|
||||
*/
|
||||
|
||||
/*
|
||||
* --- DRF request sequence generation ---
|
||||
* Each lane of an episode starts selecting its location by calling:
|
||||
* (1) getAtomicLoc
|
||||
* (2) getLoadLoc/getStoreLoc
|
||||
* (3) finishLocSelection
|
||||
*
|
||||
* Each lane of an episode completes its executing by calling:
|
||||
* releaseLocation for all locations it selected
|
||||
*/
|
||||
|
||||
/*
|
||||
* --- Internal structures ---
|
||||
* There are multiple atomic structures, each of which corresponds
|
||||
* to an atomic location.
|
||||
*
|
||||
* Each atomic structure manages a distinct range of locations in locArray
|
||||
* This array is partitioned into 3 parts that are used to select locations
|
||||
* for LDs and STs. Here is the location selecting rule:
|
||||
* | (1) | (2) | (3) |
|
||||
* - all locations in (1) cannot be picked for any LD and ST action
|
||||
* - all locations in (2) can be picked for either LD or ST action
|
||||
* - all locations in (3) can be picked for LD action only
|
||||
*
|
||||
* We maintain the 3 parts by 2 indices firstMark and secondMark.
|
||||
* As locations are moved between partitions, both indices are updated
|
||||
* accordingly.
|
||||
* [0 .. firstMark-1] part (1)
|
||||
* [firstMark .. secondMark-1] part (2)
|
||||
* [secondMark .. arraySize-1] part (3)
|
||||
*
|
||||
* Each location has its context/property. locProps maintains
|
||||
* contexts/properties of all locations. Context/property includes
|
||||
* - current index of a location in locArray
|
||||
* - the number of owners who are currently using the location
|
||||
*
|
||||
* To guarantee DRF constraints, the following conditions must hold
|
||||
* - all locations in (1) have exactly 1 owner
|
||||
* - all locations in (2) have exactly 0 owner
|
||||
* - all locations in (3) have at least 1 owner
|
||||
* - A LD request can randomly pick any location in (2) & (3)
|
||||
* - A ST request can randomly pick any location in (2)
|
||||
*
|
||||
* loadStoreMap maintains all locations already selected for LDs/STs so far
|
||||
*
|
||||
* When endLocSelection is called (i.e., we've picked all locations for an
|
||||
* episode), we need to move each selected location to its right partition.
|
||||
* if LD_bit == 1 && ST_bit == 0 (i.e., picked for LDs), then move the
|
||||
* location to (3) -> future LDs can pick it.
|
||||
* if LD_bit == 0 && ST_bit == 1, then move the location to (1) -> NO future
|
||||
* action can pick it until this episode is done.
|
||||
* if LD_bit == 1 && ST_bit == 1, then move the location to (1) -> NO future
|
||||
* action can pick it until this episode is done.
|
||||
* clear the loadStoreMap
|
||||
*/
|
||||
|
||||
class AddressManager
|
||||
{
|
||||
public:
|
||||
AddressManager(int n_atomic_locs, int numNormalLocsPerAtomic);
|
||||
~AddressManager();
|
||||
|
||||
typedef int32_t Value;
|
||||
typedef int32_t Location;
|
||||
|
||||
// return the unique address mapped to a location
|
||||
Addr getAddress(Location loc);
|
||||
// return a unique atomic location & start picking locations
|
||||
Location getAtomicLoc();
|
||||
// return a random location for LD
|
||||
Location getLoadLoc(Location atomic_loc);
|
||||
// return a random location for ST
|
||||
Location getStoreLoc(Location atomic_loc);
|
||||
// finish picking locations
|
||||
void finishLocSelection(Location atomic_loc);
|
||||
// an episode is done, release location I've picked
|
||||
void releaseLocation(Location atomic_loc, Location loc);
|
||||
// update a log table entry with a given set of values
|
||||
void updateLogTable(Location loc, int threadId, int episodeId,
|
||||
Value new_value, Tick curTick, int cuId = -1);
|
||||
// return the current value in the log table
|
||||
Value getLoggedValue(Location loc) const;
|
||||
// validate atomic response
|
||||
bool validateAtomicResp(Location loc, Value ret_val);
|
||||
|
||||
std::string printLastWriter(Location loc) const;
|
||||
|
||||
static const int INVALID_VALUE;
|
||||
static const int INVALID_LOCATION;
|
||||
|
||||
private:
|
||||
class LastWriter
|
||||
{
|
||||
public:
|
||||
LastWriter()
|
||||
: threadId(-1), cuId(-1), episodeId(-1), value(0),
|
||||
writeTick(0)
|
||||
{ }
|
||||
|
||||
const std::string print() const
|
||||
{
|
||||
return "(GpuThread ID " + std::to_string(threadId) +
|
||||
", CU ID " + std::to_string(cuId) +
|
||||
", Episode ID " + std::to_string(episodeId) +
|
||||
", Value " + std::to_string(value) +
|
||||
", Tick " + std::to_string(writeTick) +
|
||||
")";
|
||||
}
|
||||
|
||||
void update(int _thread, int _cu, int _episode, Value _value,
|
||||
Tick _tick)
|
||||
{
|
||||
threadId = _thread;
|
||||
cuId = _cu;
|
||||
episodeId = _episode;
|
||||
value = _value;
|
||||
writeTick = _tick;
|
||||
}
|
||||
|
||||
Value getLastStoredValue() const { return value; }
|
||||
|
||||
private:
|
||||
int threadId;
|
||||
int cuId;
|
||||
int episodeId;
|
||||
Value value;
|
||||
Tick writeTick;
|
||||
};
|
||||
|
||||
class AtomicStruct
|
||||
{
|
||||
public:
|
||||
AtomicStruct(Location atom_loc, Location loc_begin, Location loc_end);
|
||||
~AtomicStruct();
|
||||
|
||||
// functions picking locations for LD/ST/ATOMIC ops
|
||||
void startLocSelection();
|
||||
Location getLoadLoc();
|
||||
Location getStoreLoc();
|
||||
void endLocSelection();
|
||||
|
||||
// an episode completed its actions
|
||||
// return locations to their correct positions
|
||||
void releaseLoc(Location loc);
|
||||
// is the value what we expect?
|
||||
bool isExpectedValue(Value val);
|
||||
|
||||
private:
|
||||
Location atomicLoc;
|
||||
Location locationBase;
|
||||
|
||||
// array storing all locations this structure is managing
|
||||
Location* locArray;
|
||||
int firstMark, secondMark;
|
||||
int arraySize;
|
||||
|
||||
// a vector of location's properties
|
||||
typedef std::pair<int, int> LocProperty;
|
||||
typedef std::vector<LocProperty> LocPropTable;
|
||||
LocPropTable locProps;
|
||||
|
||||
// a temporary map of location and its LD/ST selection
|
||||
typedef std::pair<bool, bool> LdStBits;
|
||||
typedef std::unordered_map<Location, LdStBits> LdStMap;
|
||||
LdStMap loadStoreMap;
|
||||
|
||||
// number of atomic requests at this location so far
|
||||
int requestCount;
|
||||
// a set of expected values
|
||||
// when we request the first n atomic ops, we expect to receive n
|
||||
// return values from [0 .. n-1]
|
||||
typedef std::unordered_set<Value> ExpectedValueSet;
|
||||
ExpectedValueSet expectedValues;
|
||||
|
||||
// swap two locations in locArray
|
||||
void swap(LocProperty& prop_1, LocProperty& prop_2);
|
||||
|
||||
bool inFirstRegion(int idx) const
|
||||
{
|
||||
return (idx >= 0 && idx < firstMark);
|
||||
}
|
||||
bool inSecondRegion(int idx) const
|
||||
{
|
||||
return (idx >= firstMark && idx < secondMark);
|
||||
}
|
||||
bool inThirdRegion(int idx) const
|
||||
{
|
||||
return (idx >= secondMark && idx < arraySize);
|
||||
}
|
||||
};
|
||||
|
||||
// number of atomic locations
|
||||
int numAtomicLocs;
|
||||
// number of normal/non-atomic locations per atomic structure
|
||||
int numLocsPerAtomic;
|
||||
// total number of non-atomic locations
|
||||
int numNormalLocs;
|
||||
|
||||
// location - address mapping
|
||||
typedef std::vector<Addr> AddressMap;
|
||||
AddressMap randAddressMap;
|
||||
|
||||
// a list of atomic structures
|
||||
typedef std::vector<AtomicStruct*> AtomicStructTable;
|
||||
AtomicStructTable atomicStructs;
|
||||
|
||||
// internal log table
|
||||
typedef std::vector<LastWriter*> LogTable;
|
||||
LogTable logTable;
|
||||
};
|
||||
|
||||
#endif /* CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ */
|
||||
123
src/cpu/testers/gpu_ruby_test/cpu_thread.cc
Normal file
123
src/cpu/testers/gpu_ruby_test/cpu_thread.cc
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||
|
||||
#include "debug/ProtocolTest.hh"
|
||||
|
||||
CpuThread::CpuThread(const Params &p)
|
||||
:GpuThread(p)
|
||||
{
|
||||
threadName = "CpuThread(Thread ID " + std::to_string(threadId) + ")";
|
||||
threadEvent.setDesc("CpuThread tick");
|
||||
assert(numLanes == 1);
|
||||
}
|
||||
|
||||
CpuThread*
|
||||
CpuThreadParams::create() const
|
||||
{
|
||||
return new CpuThread(*this);
|
||||
}
|
||||
|
||||
void
|
||||
CpuThread::issueLoadOps()
|
||||
{
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::LOAD);
|
||||
// we should not have any outstanding fence or atomic op at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
fatal("CpuThread::issueLoadOps - not yet implemented");
|
||||
}
|
||||
|
||||
void
|
||||
CpuThread::issueStoreOps()
|
||||
{
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::STORE);
|
||||
// we should not have any outstanding fence or atomic op at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
fatal("CpuThread::issueStoreOps - not yet implemented");
|
||||
}
|
||||
|
||||
void
|
||||
CpuThread::issueAtomicOps()
|
||||
{
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
|
||||
// we should not have any outstanding ops at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
fatal("CpuThread::issueAtomicOps - not yet implemented");
|
||||
}
|
||||
|
||||
void
|
||||
CpuThread::issueAcquireOp()
|
||||
{
|
||||
DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");
|
||||
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
|
||||
// we should not have any outstanding ops at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// no-op: Acquire does not apply to CPU threads
|
||||
}
|
||||
|
||||
void
|
||||
CpuThread::issueReleaseOp()
|
||||
{
|
||||
DPRINTF(ProtocolTest, "Issuing Release Op ...\n");
|
||||
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::RELEASE);
|
||||
// we should not have any outstanding ops at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// no-op: Release does not apply to CPU threads
|
||||
}
|
||||
|
||||
void
|
||||
CpuThread::hitCallback(PacketPtr pkt)
|
||||
{
|
||||
fatal("CpuThread::hitCallback - not yet implemented");
|
||||
}
|
||||
61
src/cpu/testers/gpu_ruby_test/cpu_thread.hh
Normal file
61
src/cpu/testers/gpu_ruby_test/cpu_thread.hh
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
|
||||
#define CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||
#include "params/CpuThread.hh"
|
||||
#include "sim/clocked_object.hh"
|
||||
|
||||
class CpuThread : public GpuThread
|
||||
{
|
||||
public:
|
||||
typedef CpuThreadParams Params;
|
||||
CpuThread(const Params &p);
|
||||
virtual ~CpuThread() = default;
|
||||
|
||||
typedef AddressManager::Location Location;
|
||||
typedef AddressManager::Value Value;
|
||||
|
||||
void hitCallback(PacketPtr pkt);
|
||||
|
||||
protected:
|
||||
void issueLoadOps();
|
||||
void issueStoreOps();
|
||||
void issueAtomicOps();
|
||||
void issueAcquireOp();
|
||||
void issueReleaseOp();
|
||||
};
|
||||
|
||||
#endif /* CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ */
|
||||
321
src/cpu/testers/gpu_ruby_test/episode.cc
Normal file
321
src/cpu/testers/gpu_ruby_test/episode.cc
Normal file
@@ -0,0 +1,321 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/episode.hh"
|
||||
|
||||
#include <fstream>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||
|
||||
Episode::Episode(ProtocolTester* _tester, GpuThread* _thread, int num_loads,
|
||||
int num_stores)
|
||||
: tester(_tester),
|
||||
thread(_thread),
|
||||
numLoads(num_loads),
|
||||
numStores(num_stores),
|
||||
nextActionIdx(0)
|
||||
{
|
||||
assert(tester && thread);
|
||||
|
||||
episodeId = tester->getNextEpisodeID();
|
||||
numLanes = thread->getNumLanes();
|
||||
assert(numLanes > 0);
|
||||
|
||||
addrManager = tester->getAddressManager();
|
||||
assert(addrManager);
|
||||
|
||||
atomicLocs.resize(numLanes, AddressManager::INVALID_LOCATION);
|
||||
// generate a sequence of actions
|
||||
initActions();
|
||||
isActive = true;
|
||||
|
||||
DPRINTFN("Episode %d\n", episodeId);
|
||||
}
|
||||
|
||||
Episode::~Episode()
|
||||
{
|
||||
for (Episode::Action* action : actions) {
|
||||
assert(action);
|
||||
delete action;
|
||||
}
|
||||
}
|
||||
|
||||
const Episode::Action*
|
||||
Episode::peekCurAction() const
|
||||
{
|
||||
if (nextActionIdx < actions.size())
|
||||
return actions[nextActionIdx];
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
Episode::popAction()
|
||||
{
|
||||
assert(nextActionIdx < actions.size());
|
||||
nextActionIdx++;
|
||||
}
|
||||
|
||||
void
|
||||
Episode::initActions()
|
||||
{
|
||||
// first, push Atomic & then Acquire action
|
||||
actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
|
||||
actions.push_back(new Action(Action::Type::ACQUIRE, numLanes));
|
||||
|
||||
// second, push a number of LD/ST actions
|
||||
int num_loads = numLoads;
|
||||
int num_stores = numStores;
|
||||
while ((num_loads + num_stores) > 0) {
|
||||
switch (random() % 2) {
|
||||
case 0: // Load
|
||||
if (num_loads > 0) {
|
||||
actions.push_back(new Action(Action::Type::LOAD,
|
||||
numLanes));
|
||||
num_loads--;
|
||||
}
|
||||
break;
|
||||
case 1: // Store
|
||||
if (num_stores > 0) {
|
||||
actions.push_back(new Action(Action::Type::STORE,
|
||||
numLanes));
|
||||
num_stores--;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
// last, push an Release & then Atomic action
|
||||
actions.push_back(new Action(Action::Type::RELEASE, numLanes));
|
||||
actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
|
||||
|
||||
// for each lane, pick a list of locations
|
||||
Location normal_loc;
|
||||
|
||||
for (int lane = 0; lane < numLanes; ++lane) {
|
||||
normal_loc = AddressManager::INVALID_LOCATION;
|
||||
|
||||
// first, we select atomic loc for this lane
|
||||
// atomic loc for this lane should not have been picked yet
|
||||
assert(atomicLocs[lane] == AddressManager::INVALID_LOCATION);
|
||||
// pick randomly an atomic location
|
||||
atomicLocs[lane] = addrManager->getAtomicLoc();
|
||||
assert(atomicLocs[lane] >= 0);
|
||||
|
||||
// go through each action in this lane and set its location
|
||||
for (Action* action : actions) {
|
||||
assert(action);
|
||||
|
||||
switch (action->getType()) {
|
||||
case Action::Type::ATOMIC:
|
||||
action->setLocation(lane, atomicLocs[lane]);
|
||||
break;
|
||||
case Action::Type::LOAD:
|
||||
// pick randomly a normal location
|
||||
normal_loc = addrManager->
|
||||
getLoadLoc(atomicLocs[lane]);
|
||||
assert(normal_loc >= AddressManager::INVALID_LOCATION);
|
||||
|
||||
if (normal_loc != AddressManager::INVALID_LOCATION) {
|
||||
// check DRF
|
||||
if (!tester->checkDRF(atomicLocs[lane],
|
||||
normal_loc, false) ||
|
||||
!this->checkDRF(atomicLocs[lane], normal_loc,
|
||||
false, lane)) {
|
||||
panic("GpuTh %d - Data race detected. STOPPED!\n",
|
||||
thread->getGpuThreadId());
|
||||
}
|
||||
}
|
||||
|
||||
action->setLocation(lane, normal_loc);
|
||||
break;
|
||||
case Action::Type::STORE:
|
||||
// pick randomly a normal location
|
||||
normal_loc = addrManager->
|
||||
getStoreLoc(atomicLocs[lane]);
|
||||
assert(normal_loc >= AddressManager::INVALID_LOCATION);
|
||||
|
||||
if (normal_loc != AddressManager::INVALID_LOCATION) {
|
||||
// check DRF
|
||||
if (!tester->checkDRF(atomicLocs[lane],
|
||||
normal_loc, true) ||
|
||||
!this->checkDRF(atomicLocs[lane], normal_loc,
|
||||
true, lane)) {
|
||||
panic("GpuTh %d - Data race detected. STOPPED!\n",
|
||||
thread->getGpuThreadId());
|
||||
}
|
||||
}
|
||||
|
||||
action->setLocation(lane, normal_loc);
|
||||
break;
|
||||
case Action::Type::ACQUIRE:
|
||||
case Action::Type::RELEASE:
|
||||
// no op
|
||||
break;
|
||||
default:
|
||||
panic("Invalid action type\n");
|
||||
}
|
||||
}
|
||||
|
||||
addrManager->finishLocSelection(atomicLocs[lane]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Episode::completeEpisode()
|
||||
{
|
||||
// release all locations this episode has picked and used
|
||||
Location atomic_loc, normal_loc;
|
||||
for (int lane = 0; lane < numLanes; ++lane) {
|
||||
atomic_loc = AddressManager::INVALID_LOCATION;
|
||||
normal_loc = AddressManager::INVALID_LOCATION;
|
||||
|
||||
std::unordered_set<Location> unique_loc_set;
|
||||
|
||||
for (Action* action : actions) {
|
||||
assert(action);
|
||||
|
||||
if (action->isAtomicAction()) {
|
||||
if (atomic_loc == AddressManager::INVALID_LOCATION) {
|
||||
atomic_loc = action->getLocation(lane);
|
||||
} else {
|
||||
// both atomic ops in the same lane must be
|
||||
// at the same location
|
||||
assert(atomic_loc == action->getLocation(lane));
|
||||
}
|
||||
} else if (!action->isMemFenceAction()) {
|
||||
assert(atomic_loc >= 0);
|
||||
normal_loc = action->getLocation(lane);
|
||||
|
||||
if (normal_loc >= 0)
|
||||
unique_loc_set.insert(normal_loc);
|
||||
}
|
||||
}
|
||||
|
||||
// each unique loc can be released only once
|
||||
for (Location loc : unique_loc_set)
|
||||
addrManager->releaseLocation(atomic_loc, loc);
|
||||
}
|
||||
|
||||
// this episode is no longer active
|
||||
isActive = false;
|
||||
}
|
||||
|
||||
bool
|
||||
Episode::checkDRF(Location atomic_loc, Location loc, bool isStore,
|
||||
int max_lane) const
|
||||
{
|
||||
assert(atomic_loc != AddressManager::INVALID_LOCATION);
|
||||
assert(loc != AddressManager::INVALID_LOCATION);
|
||||
assert(max_lane <= numLanes);
|
||||
|
||||
for (int lane = 0; lane < max_lane; ++lane) {
|
||||
if (atomic_loc == atomicLocs[lane]) {
|
||||
for (const Action* action : actions) {
|
||||
if (!action->isAtomicAction() &&
|
||||
!action->isMemFenceAction()) {
|
||||
if (isStore && loc == action->getLocation(lane)) {
|
||||
warn("ST at location %d races against thread %d\n",
|
||||
loc, thread->getGpuThreadId());
|
||||
return false;
|
||||
} else if (!isStore &&
|
||||
action->getType() == Action::Type::STORE &&
|
||||
loc == action->getLocation(lane)) {
|
||||
warn("LD at location %d races against thread %d\n",
|
||||
loc, thread->getGpuThreadId());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// -------------------- Action class ----------------------------
|
||||
Episode::Action::Action(Type t, int num_lanes)
|
||||
: type(t),
|
||||
numLanes(num_lanes)
|
||||
{
|
||||
assert(numLanes > 0);
|
||||
locations.resize(numLanes);
|
||||
for (Location &loc : locations) loc = AddressManager::INVALID_LOCATION;
|
||||
}
|
||||
|
||||
void
|
||||
Episode::Action::setLocation(int lane, Location loc)
|
||||
{
|
||||
assert(lane >= 0 && lane < numLanes);
|
||||
locations[lane] = loc;
|
||||
}
|
||||
|
||||
AddressManager::Location
|
||||
Episode::Action::getLocation(int lane) const
|
||||
{
|
||||
assert(lane >= 0 && lane < numLanes);
|
||||
return locations[lane];
|
||||
}
|
||||
|
||||
bool
|
||||
Episode::Action::isAtomicAction() const
|
||||
{
|
||||
return (type == Type::ATOMIC);
|
||||
}
|
||||
|
||||
bool
|
||||
Episode::Action::isMemFenceAction() const
|
||||
{
|
||||
return (type == Type::ACQUIRE || type == Type::RELEASE);
|
||||
}
|
||||
|
||||
const std::string
|
||||
Episode::Action::printType() const
|
||||
{
|
||||
if (type == Type::ACQUIRE)
|
||||
return "ACQUIRE";
|
||||
else if (type == Type::RELEASE)
|
||||
return "RELEASE";
|
||||
else if (type == Type::ATOMIC)
|
||||
return "ATOMIC";
|
||||
else if (type == Type::LOAD)
|
||||
return "LOAD";
|
||||
else if (type == Type::STORE)
|
||||
return "STORE";
|
||||
else
|
||||
panic("Invalid action type\n");
|
||||
}
|
||||
126
src/cpu/testers/gpu_ruby_test/episode.hh
Normal file
126
src/cpu/testers/gpu_ruby_test/episode.hh
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
|
||||
#define CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||
|
||||
class ProtocolTester;
|
||||
class GpuThread;
|
||||
|
||||
class Episode
|
||||
{
|
||||
public:
|
||||
typedef AddressManager::Location Location;
|
||||
typedef AddressManager::Value Value;
|
||||
|
||||
class Action {
|
||||
public:
|
||||
enum class Type {
|
||||
ACQUIRE,
|
||||
RELEASE,
|
||||
ATOMIC,
|
||||
LOAD,
|
||||
STORE,
|
||||
};
|
||||
|
||||
Action(Type t, int num_lanes);
|
||||
~Action() {}
|
||||
|
||||
Type getType() const { return type; }
|
||||
void setLocation(int lane, Location loc);
|
||||
Location getLocation(int lane) const;
|
||||
bool isAtomicAction() const;
|
||||
bool isMemFenceAction() const;
|
||||
const std::string printType() const;
|
||||
|
||||
private:
|
||||
Type type;
|
||||
int numLanes;
|
||||
typedef std::vector<Location> LocationList;
|
||||
LocationList locations;
|
||||
};
|
||||
|
||||
Episode(ProtocolTester* tester, GpuThread* thread, int num_loads,
|
||||
int num_stores);
|
||||
~Episode();
|
||||
|
||||
// return episode id
|
||||
int getEpisodeId() const { return episodeId; }
|
||||
// return the action at the head of the action queue
|
||||
const Action* peekCurAction() const;
|
||||
// pop the action at the head of the action queue
|
||||
void popAction();
|
||||
// check if there is more action to be issued in this episode
|
||||
bool hasMoreActions() const { return nextActionIdx < actions.size();}
|
||||
// complete this episode by releasing all locations & updating st effects
|
||||
void completeEpisode();
|
||||
// check if this episode is executing
|
||||
bool isEpsActive() const { return isActive; }
|
||||
// check if the input episode and this one have any data race
|
||||
bool checkDRF(Location atomic_loc, Location loc, bool isStore,
|
||||
int max_lane) const;
|
||||
|
||||
private:
|
||||
// pointers to tester, thread and address amanger structures
|
||||
ProtocolTester *tester;
|
||||
GpuThread *thread;
|
||||
AddressManager *addrManager;
|
||||
|
||||
// a unique episode id
|
||||
int episodeId;
|
||||
// list of actions in this episode
|
||||
typedef std::vector<Action*> ActionList;
|
||||
ActionList actions;
|
||||
// list of atomic locations picked for this episode
|
||||
typedef std::vector<Location> AtomicLocationList;
|
||||
AtomicLocationList atomicLocs;
|
||||
|
||||
// is a thread running this episode?
|
||||
bool isActive;
|
||||
// episode length = num_loads + num_stores
|
||||
int numLoads;
|
||||
int numStores;
|
||||
// index of the next action in actions
|
||||
int nextActionIdx;
|
||||
// number of lanes in this thread
|
||||
int numLanes;
|
||||
|
||||
// randomly generate actions in this episode
|
||||
void initActions();
|
||||
};
|
||||
|
||||
#endif /* CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ */
|
||||
430
src/cpu/testers/gpu_ruby_test/gpu_thread.cc
Normal file
430
src/cpu/testers/gpu_ruby_test/gpu_thread.cc
Normal file
@@ -0,0 +1,430 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "debug/ProtocolTest.hh"
|
||||
|
||||
GpuThread::GpuThread(const Params &p)
|
||||
: ClockedObject(p),
|
||||
threadEvent(this, "GpuThread tick"),
|
||||
deadlockCheckEvent(this),
|
||||
threadId(p.thread_id),
|
||||
numLanes(p.num_lanes),
|
||||
tester(nullptr), addrManager(nullptr), port(nullptr),
|
||||
scalarPort(nullptr), sqcPort(nullptr), curEpisode(nullptr),
|
||||
curAction(nullptr), pendingLdStCount(0), pendingFenceCount(0),
|
||||
pendingAtomicCount(0), lastActiveCycle(Cycles(0)),
|
||||
deadlockThreshold(p.deadlock_threshold)
|
||||
{
|
||||
}
|
||||
|
||||
GpuThread::~GpuThread()
|
||||
{
|
||||
for (auto ep : episodeHistory) {
|
||||
assert(ep != nullptr);
|
||||
delete ep;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::wakeup()
|
||||
{
|
||||
// this thread is waken up by one of the following events
|
||||
// - hitCallback is called
|
||||
// - a new episode is created
|
||||
|
||||
// check if this is the first episode in this thread
|
||||
if (curEpisode == nullptr) {
|
||||
issueNewEpisode();
|
||||
assert(curEpisode);
|
||||
}
|
||||
|
||||
if (isNextActionReady()) {
|
||||
// isNextActionReady should check if the action list is empty
|
||||
assert(curAction != nullptr);
|
||||
|
||||
// issue the next action
|
||||
issueNextAction();
|
||||
} else {
|
||||
// check for completion of the current episode
|
||||
// completion = no outstanding requests + not having more actions
|
||||
if (!curEpisode->hasMoreActions() &&
|
||||
pendingLdStCount == 0 &&
|
||||
pendingFenceCount == 0 &&
|
||||
pendingAtomicCount == 0) {
|
||||
|
||||
curEpisode->completeEpisode();
|
||||
|
||||
// check if it's time to stop the tester
|
||||
if (tester->checkExit()) {
|
||||
// no more event is scheduled for this thread
|
||||
return;
|
||||
}
|
||||
|
||||
// issue the next episode
|
||||
issueNewEpisode();
|
||||
assert(curEpisode);
|
||||
|
||||
// now we get a new episode
|
||||
// let's wake up the thread in the next cycle
|
||||
if (!threadEvent.scheduled()) {
|
||||
scheduleWakeup();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::scheduleWakeup()
|
||||
{
|
||||
assert(!threadEvent.scheduled());
|
||||
schedule(threadEvent, nextCycle());
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::scheduleDeadlockCheckEvent()
|
||||
{
|
||||
// after this first schedule, the deadlock event is scheduled by itself
|
||||
assert(!deadlockCheckEvent.scheduled());
|
||||
schedule(deadlockCheckEvent, nextCycle());
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
|
||||
ProtocolTester::SeqPort *_port,
|
||||
ProtocolTester::SeqPort *_scalarPort,
|
||||
ProtocolTester::SeqPort *_sqcPort)
|
||||
{
|
||||
tester = _tester;
|
||||
port = _port;
|
||||
scalarPort = _scalarPort;
|
||||
sqcPort = _sqcPort;
|
||||
|
||||
assert(tester && port);
|
||||
addrManager = tester->getAddressManager();
|
||||
assert(addrManager);
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::issueNewEpisode()
|
||||
{
|
||||
int num_reg_loads = random() % tester->getEpisodeLength();
|
||||
int num_reg_stores = tester->getEpisodeLength() - num_reg_loads;
|
||||
|
||||
// create a new episode
|
||||
curEpisode = new Episode(tester, this, num_reg_loads, num_reg_stores);
|
||||
episodeHistory.push_back(curEpisode);
|
||||
}
|
||||
|
||||
bool
|
||||
GpuThread::isNextActionReady()
|
||||
{
|
||||
if (!curEpisode->hasMoreActions()) {
|
||||
return false;
|
||||
} else {
|
||||
curAction = curEpisode->peekCurAction();
|
||||
|
||||
switch(curAction->getType()) {
|
||||
case Episode::Action::Type::ATOMIC:
|
||||
// an atomic action must wait for all previous requests
|
||||
// to complete
|
||||
if (pendingLdStCount == 0 &&
|
||||
pendingFenceCount == 0 &&
|
||||
pendingAtomicCount == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
case Episode::Action::Type::ACQUIRE:
|
||||
// we should not see any outstanding ld_st or fence here
|
||||
assert(pendingLdStCount == 0 &&
|
||||
pendingFenceCount == 0);
|
||||
|
||||
// an acquire action must wait for all previous atomic
|
||||
// requests to complete
|
||||
if (pendingAtomicCount == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
case Episode::Action::Type::RELEASE:
|
||||
// we should not see any outstanding atomic or fence here
|
||||
assert(pendingAtomicCount == 0 &&
|
||||
pendingFenceCount == 0);
|
||||
|
||||
// a release action must wait for all previous ld/st
|
||||
// requests to complete
|
||||
if (pendingLdStCount == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
case Episode::Action::Type::LOAD:
|
||||
case Episode::Action::Type::STORE:
|
||||
// we should not see any outstanding atomic here
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// can't issue if there is a pending fence
|
||||
if (pendingFenceCount > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// a Load or Store is ready if it doesn't overlap
|
||||
// with any outstanding request
|
||||
for (int lane = 0; lane < numLanes; ++lane) {
|
||||
Location loc = curAction->getLocation(lane);
|
||||
|
||||
if (loc != AddressManager::INVALID_LOCATION) {
|
||||
Addr addr = addrManager->getAddress(loc);
|
||||
|
||||
if (outstandingLoads.find(addr) !=
|
||||
outstandingLoads.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (outstandingStores.find(addr) !=
|
||||
outstandingStores.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (outstandingAtomics.find(addr) !=
|
||||
outstandingAtomics.end()) {
|
||||
// this is not an atomic action, so the address
|
||||
// should not be in outstandingAtomics list
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
default:
|
||||
panic("The tester got an invalid action\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::issueNextAction()
|
||||
{
|
||||
switch(curAction->getType()) {
|
||||
case Episode::Action::Type::ATOMIC:
|
||||
issueAtomicOps();
|
||||
break;
|
||||
case Episode::Action::Type::ACQUIRE:
|
||||
issueAcquireOp();
|
||||
break;
|
||||
case Episode::Action::Type::RELEASE:
|
||||
issueReleaseOp();
|
||||
break;
|
||||
case Episode::Action::Type::LOAD:
|
||||
issueLoadOps();
|
||||
break;
|
||||
case Episode::Action::Type::STORE:
|
||||
issueStoreOps();
|
||||
break;
|
||||
default:
|
||||
panic("The tester got an invalid action\n");
|
||||
}
|
||||
|
||||
// the current action has been issued, pop it from the action list
|
||||
curEpisode->popAction();
|
||||
lastActiveCycle = curCycle();
|
||||
|
||||
// we may be able to schedule the next action
|
||||
// just wake up this thread in the next cycle
|
||||
if (!threadEvent.scheduled()) {
|
||||
scheduleWakeup();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::addOutstandingReqs(OutstandingReqTable& req_table, Addr address,
|
||||
int lane, Location loc, Value stored_val)
|
||||
{
|
||||
OutstandingReqTable::iterator it = req_table.find(address);
|
||||
OutstandingReq req(lane, loc, stored_val, curCycle());
|
||||
|
||||
if (it == req_table.end()) {
|
||||
// insert a new list of requests for this address
|
||||
req_table.insert(std::pair<Addr, OutstandingReqList>(address,
|
||||
OutstandingReqList(1, req)));
|
||||
} else {
|
||||
// add a new request
|
||||
(it->second).push_back(req);
|
||||
}
|
||||
}
|
||||
|
||||
GpuThread::OutstandingReq
|
||||
GpuThread::popOutstandingReq(OutstandingReqTable& req_table, Addr addr)
|
||||
{
|
||||
OutstandingReqTable::iterator it = req_table.find(addr);
|
||||
|
||||
// there must be exactly one list of requests for this address in the table
|
||||
assert(it != req_table.end());
|
||||
|
||||
// get the request list
|
||||
OutstandingReqList& req_list = it->second;
|
||||
assert(!req_list.empty());
|
||||
|
||||
// save a request
|
||||
OutstandingReq ret_req = req_list.back();
|
||||
|
||||
// remove the request from the list
|
||||
req_list.pop_back();
|
||||
|
||||
// if the list is now empty, remove it from req_table
|
||||
if (req_list.empty()) {
|
||||
req_table.erase(it);
|
||||
}
|
||||
|
||||
return ret_req;
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::validateAtomicResp(Location loc, int lane, Value ret_val)
|
||||
{
|
||||
if (!addrManager->validateAtomicResp(loc, ret_val)) {
|
||||
std::stringstream ss;
|
||||
Addr addr = addrManager->getAddress(loc);
|
||||
|
||||
// basic info
|
||||
ss << threadName << ": Atomic Op returned unexpected value\n"
|
||||
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
|
||||
<< "\tLane ID " << lane << "\n"
|
||||
<< "\tAddress " << printAddress(addr) << "\n"
|
||||
<< "\tAtomic Op's return value " << ret_val << "\n";
|
||||
|
||||
// print out basic info
|
||||
warn("%s\n", ss.str());
|
||||
|
||||
// TODO add more detailed info
|
||||
|
||||
// dump all error info and exit the simulation
|
||||
tester->dumpErrorLog(ss);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::validateLoadResp(Location loc, int lane, Value ret_val)
|
||||
{
|
||||
if (ret_val != addrManager->getLoggedValue(loc)) {
|
||||
std::stringstream ss;
|
||||
Addr addr = addrManager->getAddress(loc);
|
||||
|
||||
// basic info
|
||||
ss << threadName << ": Loaded value is not consistent with "
|
||||
<< "the last stored value\n"
|
||||
<< "\tGpuThread " << threadId << "\n"
|
||||
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
|
||||
<< "\tLane ID " << lane << "\n"
|
||||
<< "\tAddress " << printAddress(addr) << "\n"
|
||||
<< "\tLoaded value " << ret_val << "\n"
|
||||
<< "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
|
||||
|
||||
// print out basic info
|
||||
warn("%s\n", ss.str());
|
||||
|
||||
// TODO add more detailed info
|
||||
|
||||
// dump all error info and exit the simulation
|
||||
tester->dumpErrorLog(ss);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
GpuThread::checkDRF(Location atomic_loc, Location loc, bool isStore) const
|
||||
{
|
||||
if (curEpisode && curEpisode->isEpsActive()) {
|
||||
// check against the current episode this thread is executing
|
||||
return curEpisode->checkDRF(atomic_loc, loc, isStore, numLanes);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::checkDeadlock()
|
||||
{
|
||||
if ((curCycle() - lastActiveCycle) > deadlockThreshold) {
|
||||
// deadlock detected
|
||||
std::stringstream ss;
|
||||
|
||||
ss << threadName << ": Deadlock detected\n"
|
||||
<< "\tLast active cycle: " << lastActiveCycle << "\n"
|
||||
<< "\tCurrent cycle: " << curCycle() << "\n"
|
||||
<< "\tDeadlock threshold: " << deadlockThreshold << "\n";
|
||||
|
||||
// print out basic info
|
||||
warn("%s\n", ss.str());
|
||||
|
||||
// dump all error info and exit the simulation
|
||||
tester->dumpErrorLog(ss);
|
||||
} else if (!tester->checkExit()) {
|
||||
// schedule a future deadlock check event
|
||||
assert(!deadlockCheckEvent.scheduled());
|
||||
schedule(deadlockCheckEvent,
|
||||
deadlockThreshold * clockPeriod() + curTick());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::printOutstandingReqs(const OutstandingReqTable& table,
|
||||
std::stringstream& ss) const
|
||||
{
|
||||
Cycles cur_cycle = curCycle();
|
||||
|
||||
for (const auto& m : table) {
|
||||
for (const auto& req : m.second) {
|
||||
ss << "\t\t\tAddr " << printAddress(m.first)
|
||||
<< ": delta (curCycle - issueCycle) = "
|
||||
<< (cur_cycle - req.issueCycle) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuThread::printAllOutstandingReqs(std::stringstream& ss) const
|
||||
{
|
||||
// dump all outstanding requests of this thread
|
||||
ss << "\t\tOutstanding Loads:\n";
|
||||
printOutstandingReqs(outstandingLoads, ss);
|
||||
ss << "\t\tOutstanding Stores:\n";
|
||||
printOutstandingReqs(outstandingStores, ss);
|
||||
ss << "\t\tOutstanding Atomics:\n";
|
||||
printOutstandingReqs(outstandingAtomics, ss);
|
||||
ss << "\t\tNumber of outstanding acquires & releases: "
|
||||
<< pendingFenceCount << std::endl;
|
||||
}
|
||||
199
src/cpu/testers/gpu_ruby_test/gpu_thread.hh
Normal file
199
src/cpu/testers/gpu_ruby_test/gpu_thread.hh
Normal file
@@ -0,0 +1,199 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* GPU thread issues requests to and receives responses from Ruby memory
|
||||
*/
|
||||
|
||||
#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
|
||||
#define CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/episode.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
#include "sim/clocked_object.hh"
|
||||
|
||||
class GpuThread : public ClockedObject
|
||||
{
|
||||
public:
|
||||
typedef GpuThreadParams Params;
|
||||
GpuThread(const Params &p);
|
||||
virtual ~GpuThread();
|
||||
|
||||
typedef AddressManager::Location Location;
|
||||
typedef AddressManager::Value Value;
|
||||
|
||||
void wakeup();
|
||||
void scheduleWakeup();
|
||||
void checkDeadlock();
|
||||
void scheduleDeadlockCheckEvent();
|
||||
|
||||
void attachGpuThreadToPorts(ProtocolTester *_tester,
|
||||
ProtocolTester::SeqPort *_port,
|
||||
ProtocolTester::SeqPort *_sqcPort = nullptr,
|
||||
ProtocolTester::SeqPort *_scalarPort = nullptr);
|
||||
|
||||
const std::string& getName() const { return threadName; }
|
||||
|
||||
// must be implemented by a child class
|
||||
virtual void hitCallback(PacketPtr pkt) = 0;
|
||||
|
||||
int getGpuThreadId() const { return threadId; }
|
||||
int getNumLanes() const { return numLanes; }
|
||||
// check if the input location would satisfy DRF constraint
|
||||
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
|
||||
|
||||
void printAllOutstandingReqs(std::stringstream& ss) const;
|
||||
|
||||
protected:
|
||||
class GpuThreadEvent : public Event
|
||||
{
|
||||
private:
|
||||
GpuThread* thread;
|
||||
std::string desc;
|
||||
|
||||
public:
|
||||
GpuThreadEvent(GpuThread* _thread, std::string _description)
|
||||
: Event(CPU_Tick_Pri), thread(_thread), desc(_description)
|
||||
{}
|
||||
void setDesc(std::string _description) { desc = _description; }
|
||||
void process() { thread->wakeup(); }
|
||||
const std::string name() { return desc; }
|
||||
};
|
||||
|
||||
GpuThreadEvent threadEvent;
|
||||
|
||||
class DeadlockCheckEvent : public Event
|
||||
{
|
||||
private:
|
||||
GpuThread* thread;
|
||||
|
||||
public:
|
||||
DeadlockCheckEvent(GpuThread* _thread)
|
||||
: Event(CPU_Tick_Pri), thread(_thread)
|
||||
{}
|
||||
void process() { thread->checkDeadlock(); }
|
||||
const std::string name() const { return "Tester deadlock check"; }
|
||||
};
|
||||
|
||||
DeadlockCheckEvent deadlockCheckEvent;
|
||||
|
||||
struct OutstandingReq
|
||||
{
|
||||
int lane;
|
||||
Location origLoc;
|
||||
Value storedValue;
|
||||
Cycles issueCycle;
|
||||
|
||||
OutstandingReq(int _lane, Location _loc, Value _val, Cycles _cycle)
|
||||
: lane(_lane), origLoc(_loc), storedValue(_val), issueCycle(_cycle)
|
||||
{}
|
||||
|
||||
~OutstandingReq()
|
||||
{}
|
||||
};
|
||||
|
||||
// the unique global id of this thread
|
||||
int threadId;
|
||||
// width of this thread (1 for cpu thread & wf size for gpu wavefront)
|
||||
int numLanes;
|
||||
// thread name
|
||||
std::string threadName;
|
||||
// pointer to the main tester
|
||||
ProtocolTester *tester;
|
||||
// pointer to the address manager
|
||||
AddressManager *addrManager;
|
||||
|
||||
ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
|
||||
ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
|
||||
ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
|
||||
|
||||
// a list of issued episodes sorted by time
|
||||
// the last episode in the list is the current episode
|
||||
typedef std::vector<Episode*> EpisodeHistory;
|
||||
EpisodeHistory episodeHistory;
|
||||
// pointer to the current episode
|
||||
Episode *curEpisode;
|
||||
// pointer to the current action
|
||||
const Episode::Action *curAction;
|
||||
|
||||
// number of outstanding requests that are waiting for their responses
|
||||
int pendingLdStCount;
|
||||
int pendingFenceCount;
|
||||
int pendingAtomicCount;
|
||||
|
||||
// last cycle when there is an event in this thread
|
||||
Cycles lastActiveCycle;
|
||||
Cycles deadlockThreshold;
|
||||
|
||||
// a per-address list of outstanding requests
|
||||
typedef std::vector<OutstandingReq> OutstandingReqList;
|
||||
typedef std::unordered_map<Addr, OutstandingReqList> OutstandingReqTable;
|
||||
OutstandingReqTable outstandingLoads;
|
||||
OutstandingReqTable outstandingStores;
|
||||
OutstandingReqTable outstandingAtomics;
|
||||
|
||||
void issueNewEpisode();
|
||||
// check if the next action in the current episode satisfies all wait_cnt
|
||||
// constraints and is ready to issue
|
||||
bool isNextActionReady();
|
||||
void issueNextAction();
|
||||
|
||||
// issue Ops to Ruby memory
|
||||
// must be implemented by a child class
|
||||
virtual void issueLoadOps() = 0;
|
||||
virtual void issueStoreOps() = 0;
|
||||
virtual void issueAtomicOps() = 0;
|
||||
virtual void issueAcquireOp() = 0;
|
||||
virtual void issueReleaseOp() = 0;
|
||||
|
||||
// add an outstanding request to its corresponding table
|
||||
void addOutstandingReqs(OutstandingReqTable& req_table, Addr addr,
|
||||
int lane, Location loc,
|
||||
Value stored_val = AddressManager::INVALID_VALUE);
|
||||
|
||||
// pop an outstanding request from the input table
|
||||
OutstandingReq popOutstandingReq(OutstandingReqTable& req_table,
|
||||
Addr address);
|
||||
|
||||
// validate all atomic responses
|
||||
void validateAtomicResp(Location loc, int lane, Value ret_val);
|
||||
// validate all Load responses
|
||||
void validateLoadResp(Location loc, int lane, Value ret_val);
|
||||
|
||||
void printOutstandingReqs(const OutstandingReqTable& table,
|
||||
std::stringstream& ss) const;
|
||||
};
|
||||
|
||||
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ */
|
||||
377
src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
Normal file
377
src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
Normal file
@@ -0,0 +1,377 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||
|
||||
#include "debug/ProtocolTest.hh"
|
||||
|
||||
GpuWavefront::GpuWavefront(const Params &p)
|
||||
: GpuThread(p), cuId(p.cu_id)
|
||||
{
|
||||
threadName = "GpuWavefront(GpuThread ID = " + std::to_string(threadId) +
|
||||
", CU ID = " + std::to_string(cuId) + ")";
|
||||
threadEvent.setDesc("GpuWavefront tick");
|
||||
}
|
||||
|
||||
GpuWavefront::~GpuWavefront()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
GpuWavefront*
|
||||
GpuWavefrontParams::create() const
|
||||
{
|
||||
return new GpuWavefront(*this);
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::issueLoadOps()
|
||||
{
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::LOAD);
|
||||
// we should not have any outstanding fence or atomic op at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
for (int lane = 0; lane < numLanes; ++lane) {
|
||||
Location location = curAction->getLocation(lane);
|
||||
assert(location >= AddressManager::INVALID_LOCATION);
|
||||
|
||||
// Make a request if we do not get an INVALID_LOCATION for this lane.
|
||||
if (location >= 0) {
|
||||
Addr address = addrManager->getAddress(location);
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
|
||||
this->getName(), curEpisode->getEpisodeId(),
|
||||
printAddress(address));
|
||||
|
||||
int load_size = sizeof(Value);
|
||||
|
||||
// for now, assert address is 4-byte aligned
|
||||
assert(address % load_size == 0);
|
||||
|
||||
auto req = std::make_shared<Request>(address, load_size,
|
||||
0, tester->requestorId(),
|
||||
0, threadId, nullptr);
|
||||
req->setPaddr(address);
|
||||
req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||
// set protocol-specific flags
|
||||
setExtraRequestFlags(req);
|
||||
|
||||
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
|
||||
uint8_t* data = new uint8_t[load_size];
|
||||
pkt->dataDynamic(data);
|
||||
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||
|
||||
// increment the number of outstanding ld_st requests
|
||||
pendingLdStCount++;
|
||||
|
||||
if (!port->sendTimingReq(pkt)) {
|
||||
panic("Not expected failed sendTimingReq\n");
|
||||
}
|
||||
|
||||
// insert an outstanding load
|
||||
addOutstandingReqs(outstandingLoads, address, lane, location);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::issueStoreOps()
|
||||
{
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::STORE);
|
||||
// we should not have any outstanding fence or atomic op at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
for (int lane = 0; lane < numLanes; ++lane) {
|
||||
Location location = curAction->getLocation(lane);
|
||||
assert(location >= AddressManager::INVALID_LOCATION);
|
||||
|
||||
// Make a request if we do not get an INVALID_LOCATION for this lane.
|
||||
if (location >= 0) {
|
||||
// prepare the next value to store
|
||||
Value new_value = addrManager->getLoggedValue(location) + 1;
|
||||
|
||||
Addr address = addrManager->getAddress(location);
|
||||
// must be aligned with store size
|
||||
assert(address % sizeof(Value) == 0);
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
|
||||
"Value %d\n", this->getName(),
|
||||
curEpisode->getEpisodeId(), printAddress(address),
|
||||
new_value);
|
||||
|
||||
auto req = std::make_shared<Request>(address, sizeof(Value),
|
||||
0, tester->requestorId(), 0,
|
||||
threadId, nullptr);
|
||||
req->setPaddr(address);
|
||||
req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||
// set protocol-specific flags
|
||||
setExtraRequestFlags(req);
|
||||
|
||||
PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
|
||||
uint8_t *writeData = new uint8_t[sizeof(Value)];
|
||||
for (int j = 0; j < sizeof(Value); ++j) {
|
||||
writeData[j] = ((uint8_t*)&new_value)[j];
|
||||
}
|
||||
pkt->dataDynamic(writeData);
|
||||
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||
|
||||
// increment the number of outstanding ld_st requests
|
||||
pendingLdStCount++;
|
||||
|
||||
if (!port->sendTimingReq(pkt)) {
|
||||
panic("Not expecting a failed sendTimingReq\n");
|
||||
}
|
||||
|
||||
// add an outstanding store
|
||||
addOutstandingReqs(outstandingStores, address, lane, location,
|
||||
new_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::issueAtomicOps()
|
||||
{
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
|
||||
// we should not have any outstanding ops at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// we use atomic_inc in the tester
|
||||
Request::Flags flags = Request::ATOMIC_RETURN_OP;
|
||||
|
||||
for (int lane = 0; lane < numLanes; ++lane) {
|
||||
Location location = curAction->getLocation(lane);
|
||||
assert(location >= 0);
|
||||
|
||||
Addr address = addrManager->getAddress(location);
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
|
||||
this->getName(), curEpisode->getEpisodeId(),
|
||||
printAddress(address));
|
||||
|
||||
// must be aligned with store size
|
||||
assert(address % sizeof(Value) == 0);
|
||||
AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
|
||||
auto req = std::make_shared<Request>(address, sizeof(Value),
|
||||
flags, tester->requestorId(),
|
||||
0, threadId,
|
||||
AtomicOpFunctorPtr(amo_op));
|
||||
req->setPaddr(address);
|
||||
req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||
// set protocol-specific flags
|
||||
setExtraRequestFlags(req);
|
||||
|
||||
PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
|
||||
uint8_t* data = new uint8_t[sizeof(Value)];
|
||||
pkt->dataDynamic(data);
|
||||
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||
|
||||
if (!port->sendTimingReq(pkt)) {
|
||||
panic("Not expecting failed sendTimingReq\n");
|
||||
}
|
||||
|
||||
// increment the number of outstanding atomic ops
|
||||
pendingAtomicCount++;
|
||||
|
||||
// add an outstanding atomic
|
||||
addOutstandingReqs(outstandingAtomics, address, lane, location);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::issueAcquireOp()
|
||||
{
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
|
||||
curEpisode->getEpisodeId());
|
||||
|
||||
assert(curAction);
|
||||
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
|
||||
// we should not have any outstanding ops at this point
|
||||
assert(pendingFenceCount == 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
auto acq_req = std::make_shared<Request>(0, 0, 0,
|
||||
tester->requestorId(), 0,
|
||||
threadId, nullptr);
|
||||
acq_req->setPaddr(0);
|
||||
acq_req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||
acq_req->setFlags(Request::ACQUIRE);
|
||||
// set protocol-specific flags
|
||||
setExtraRequestFlags(acq_req);
|
||||
|
||||
PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
|
||||
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||
|
||||
// increment the number of outstanding fence requests
|
||||
pendingFenceCount++;
|
||||
|
||||
if (!port->sendTimingReq(pkt)) {
|
||||
panic("Not expecting failed sendTimingReq\n");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::issueReleaseOp()
|
||||
{
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
|
||||
curEpisode->getEpisodeId());
|
||||
|
||||
// A release fence simply waits for all previous stores to complete. All
|
||||
// previous loads and stores were done before this release operation is
|
||||
// issued, so issueReleaseOp is just a no-op in this tester.
|
||||
|
||||
// we may be able to issue an action. Let's check
|
||||
if (!threadEvent.scheduled()) {
|
||||
scheduleWakeup();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::hitCallback(PacketPtr pkt)
|
||||
{
|
||||
assert(pkt);
|
||||
MemCmd resp_cmd = pkt->cmd;
|
||||
Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr();
|
||||
|
||||
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
|
||||
"Addr %s\n", this->getName(),
|
||||
curEpisode->getEpisodeId(), resp_cmd.toString(),
|
||||
printAddress(addr));
|
||||
|
||||
// whether the transaction is done after this hitCallback
|
||||
bool isTransactionDone = true;
|
||||
|
||||
if (resp_cmd == MemCmd::MemSyncResp) {
|
||||
// response to a pending fence
|
||||
// no validation needed for fence responses
|
||||
assert(pendingFenceCount > 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
pendingFenceCount--;
|
||||
} else if (resp_cmd == MemCmd::ReadResp) {
|
||||
// response to a pending read
|
||||
assert(pendingLdStCount > 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
assert(outstandingLoads.count(addr) > 0);
|
||||
|
||||
// get return data
|
||||
Value value = *(pkt->getPtr<Value>());
|
||||
OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
|
||||
validateLoadResp(req.origLoc, req.lane, value);
|
||||
|
||||
// this Read is done
|
||||
pendingLdStCount--;
|
||||
} else if (resp_cmd == MemCmd::WriteResp) {
|
||||
// response to a pending write
|
||||
assert(pendingLdStCount > 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// no need to validate Write response
|
||||
// just pop it from the outstanding req table so that subsequent
|
||||
// requests dependent on this write can proceed
|
||||
// note that we don't decrement pendingLdStCount here yet since
|
||||
// the write is not yet completed in downstream memory. Instead, we
|
||||
// decrement the counter when we receive the write completion ack
|
||||
assert(outstandingStores.count(addr) > 0);
|
||||
OutstandingReq req = popOutstandingReq(outstandingStores, addr);
|
||||
assert(req.storedValue != AddressManager::INVALID_VALUE);
|
||||
|
||||
// update log table
|
||||
addrManager->updateLogTable(req.origLoc, threadId,
|
||||
curEpisode->getEpisodeId(),
|
||||
req.storedValue,
|
||||
curTick(),
|
||||
cuId);
|
||||
|
||||
// the transaction is not done yet. Waiting for write completion ack
|
||||
isTransactionDone = false;
|
||||
} else if (resp_cmd == MemCmd::SwapResp) {
|
||||
// response to a pending atomic
|
||||
assert(pendingAtomicCount > 0);
|
||||
assert(pendingLdStCount == 0);
|
||||
assert(outstandingAtomics.count(addr) > 0);
|
||||
|
||||
// get return data
|
||||
Value value = *(pkt->getPtr<Value>());
|
||||
|
||||
// validate atomic op return
|
||||
OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
|
||||
validateAtomicResp(req.origLoc, req.lane, value);
|
||||
|
||||
// update log table
|
||||
addrManager->updateLogTable(req.origLoc, threadId,
|
||||
curEpisode->getEpisodeId(), value,
|
||||
curTick(),
|
||||
cuId);
|
||||
|
||||
// this Atomic is done
|
||||
pendingAtomicCount--;
|
||||
} else if (resp_cmd == MemCmd::WriteCompleteResp) {
|
||||
// write completion ACK
|
||||
assert(pendingLdStCount > 0);
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// the Write is now done
|
||||
pendingLdStCount--;
|
||||
} else {
|
||||
panic("Unsupported MemCmd response type");
|
||||
}
|
||||
|
||||
if (isTransactionDone) {
|
||||
// no need to keep senderState and request around
|
||||
delete pkt->senderState;
|
||||
}
|
||||
|
||||
delete pkt;
|
||||
|
||||
// record the last active cycle to check for deadlock
|
||||
lastActiveCycle = curCycle();
|
||||
|
||||
// we may be able to issue an action. Let's check
|
||||
if (!threadEvent.scheduled()) {
|
||||
scheduleWakeup();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GpuWavefront::setExtraRequestFlags(RequestPtr req)
|
||||
{
|
||||
// No extra request flag is set
|
||||
}
|
||||
68
src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh
Normal file
68
src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
|
||||
#define CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||
#include "params/GpuWavefront.hh"
|
||||
#include "sim/clocked_object.hh"
|
||||
|
||||
class GpuWavefront : public GpuThread
|
||||
{
|
||||
public:
|
||||
typedef GpuWavefrontParams Params;
|
||||
GpuWavefront(const Params &p);
|
||||
virtual ~GpuWavefront();
|
||||
|
||||
typedef AddressManager::Location Location;
|
||||
typedef AddressManager::Value Value;
|
||||
|
||||
virtual void hitCallback(PacketPtr pkt);
|
||||
|
||||
protected:
|
||||
void issueLoadOps();
|
||||
void issueStoreOps();
|
||||
void issueAtomicOps();
|
||||
// acquire and release ops are protocol-specific, so their issue functions
|
||||
// may be redefined by a child class of GpuWavefront
|
||||
virtual void issueAcquireOp();
|
||||
virtual void issueReleaseOp();
|
||||
// set extra request flags that is specific to a target protocol
|
||||
virtual void setExtraRequestFlags(RequestPtr req);
|
||||
|
||||
protected:
|
||||
int cuId; // compute unit associated with this wavefront
|
||||
};
|
||||
|
||||
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ */
|
||||
312
src/cpu/testers/gpu_ruby_test/protocol_tester.cc
Normal file
312
src/cpu/testers/gpu_ruby_test/protocol_tester.cc
Normal file
@@ -0,0 +1,312 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <random>
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||
#include "debug/ProtocolTest.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
#include "sim/system.hh"
|
||||
|
||||
ProtocolTester::ProtocolTester(const Params &p)
|
||||
: ClockedObject(p),
|
||||
_requestorId(p.system->getRequestorId(this)),
|
||||
numCpuPorts(p.port_cpu_ports_connection_count),
|
||||
numVectorPorts(p.port_cu_vector_ports_connection_count),
|
||||
numSqcPorts(p.port_cu_sqc_ports_connection_count),
|
||||
numScalarPorts(p.port_cu_scalar_ports_connection_count),
|
||||
numCusPerSqc(p.cus_per_sqc),
|
||||
numCusPerScalar(p.cus_per_scalar),
|
||||
numWfsPerCu(p.wavefronts_per_cu),
|
||||
numWisPerWf(p.workitems_per_wavefront),
|
||||
numAtomicLocs(p.num_atomic_locations),
|
||||
numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
|
||||
episodeLength(p.episode_length),
|
||||
maxNumEpisodes(p.max_num_episodes),
|
||||
debugTester(p.debug_tester),
|
||||
cpuThreads(p.cpu_threads),
|
||||
wfs(p.wavefronts)
|
||||
{
|
||||
int idx = 0; // global port index
|
||||
|
||||
numCpus = numCpuPorts; // 1 cpu port per CPU
|
||||
numCus = numVectorPorts; // 1 vector port per CU
|
||||
|
||||
// create all physical cpu's data ports
|
||||
for (int i = 0; i < numCpuPorts; ++i) {
|
||||
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||
csprintf("%s-cpuPort%d", name(), i));
|
||||
cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i),
|
||||
this, i, idx));
|
||||
idx++;
|
||||
}
|
||||
|
||||
// create all physical gpu's data ports
|
||||
for (int i = 0; i < numVectorPorts; ++i) {
|
||||
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||
csprintf("%s-cuVectorPort%d", name(), i));
|
||||
cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d",
|
||||
name(), i),
|
||||
this, i, idx));
|
||||
idx++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numScalarPorts; ++i) {
|
||||
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||
csprintf("%s-cuScalarPort%d", name(), i));
|
||||
cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d",
|
||||
name(), i),
|
||||
this, i, idx));
|
||||
idx++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numSqcPorts; ++i) {
|
||||
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||
csprintf("%s-cuSqcPort%d", name(), i));
|
||||
cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d",
|
||||
name(), i),
|
||||
this, i, idx));
|
||||
idx++;
|
||||
}
|
||||
|
||||
// create an address manager
|
||||
addrManager = new AddressManager(numAtomicLocs,
|
||||
numNormalLocsPerAtomic);
|
||||
nextEpisodeId = 0;
|
||||
|
||||
if (!debugTester)
|
||||
warn("Data race check is not enabled\n");
|
||||
|
||||
sentExitSignal = false;
|
||||
|
||||
// set random seed number
|
||||
if (p.random_seed != 0) {
|
||||
srand(p.random_seed);
|
||||
} else {
|
||||
srand(time(NULL));
|
||||
}
|
||||
|
||||
actionCount = 0;
|
||||
|
||||
// create a new log file
|
||||
logFile = simout.create(p.log_file);
|
||||
assert(logFile);
|
||||
|
||||
// print test configs
|
||||
std::stringstream ss;
|
||||
ss << "GPU Ruby test's configurations" << std::endl
|
||||
<< "\tNumber of CPUs: " << numCpus << std::endl
|
||||
<< "\tNumber of CUs: " << numCus << std::endl
|
||||
<< "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
|
||||
<< "\tWavefront size: " << numWisPerWf << std::endl
|
||||
<< "\tNumber of atomic locations: " << numAtomicLocs << std::endl
|
||||
<< "\tNumber of non-atomic locations: "
|
||||
<< numNormalLocsPerAtomic * numAtomicLocs << std::endl
|
||||
<< "\tEpisode length: " << episodeLength << std::endl
|
||||
<< "\tTest length (max number of episodes): " << maxNumEpisodes
|
||||
<< std::endl
|
||||
<< "\tRandom seed: " << p.random_seed
|
||||
<< std::endl;
|
||||
|
||||
ccprintf(*(logFile->stream()), "%s", ss.str());
|
||||
logFile->stream()->flush();
|
||||
}
|
||||
|
||||
ProtocolTester::~ProtocolTester()
|
||||
{
|
||||
for (int i = 0; i < cpuPorts.size(); ++i)
|
||||
delete cpuPorts[i];
|
||||
for (int i = 0; i < cuVectorPorts.size(); ++i)
|
||||
delete cuVectorPorts[i];
|
||||
for (int i = 0; i < cuScalarPorts.size(); ++i)
|
||||
delete cuScalarPorts[i];
|
||||
for (int i = 0; i < cuSqcPorts.size(); ++i)
|
||||
delete cuSqcPorts[i];
|
||||
delete addrManager;
|
||||
|
||||
// close the log file
|
||||
simout.close(logFile);
|
||||
}
|
||||
|
||||
void
|
||||
ProtocolTester::init()
|
||||
{
|
||||
DPRINTF(ProtocolTest, "Attach threads to ports\n");
|
||||
|
||||
// connect cpu threads to cpu's ports
|
||||
for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) {
|
||||
cpuThreads[cpu_id]->attachGpuThreadToPorts(this,
|
||||
static_cast<SeqPort*>(cpuPorts[cpu_id]));
|
||||
cpuThreads[cpu_id]->scheduleWakeup();
|
||||
cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
|
||||
}
|
||||
|
||||
// connect gpu wavefronts to gpu's ports
|
||||
int wfId = 0;
|
||||
int vectorPortId = 0;
|
||||
int sqcPortId = 0;
|
||||
int scalarPortId = 0;
|
||||
|
||||
for (int cu_id = 0; cu_id < numCus; ++cu_id) {
|
||||
vectorPortId = cu_id;
|
||||
sqcPortId = cu_id/numCusPerSqc;
|
||||
scalarPortId = cu_id/numCusPerScalar;
|
||||
|
||||
for (int i = 0; i < numWfsPerCu; ++i) {
|
||||
wfId = cu_id * numWfsPerCu + i;
|
||||
wfs[wfId]->attachGpuThreadToPorts(this,
|
||||
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
|
||||
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
|
||||
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
|
||||
wfs[wfId]->scheduleWakeup();
|
||||
wfs[wfId]->scheduleDeadlockCheckEvent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Port&
|
||||
ProtocolTester::getPort(const std::string &if_name, PortID idx)
|
||||
{
|
||||
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
|
||||
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
|
||||
// pass along to super class
|
||||
return ClockedObject::getPort(if_name, idx);
|
||||
} else {
|
||||
if (if_name == "cpu_ports") {
|
||||
if (idx > numCpuPorts)
|
||||
panic("ProtocolTester: unknown cpu port %d\n", idx);
|
||||
return *cpuPorts[idx];
|
||||
} else if (if_name == "cu_vector_ports") {
|
||||
if (idx > numVectorPorts)
|
||||
panic("ProtocolTester: unknown cu vect port %d\n", idx);
|
||||
return *cuVectorPorts[idx];
|
||||
} else if (if_name == "cu_sqc_ports") {
|
||||
if (idx > numSqcPorts)
|
||||
panic("ProtocolTester: unknown cu sqc port %d\n", idx);
|
||||
return *cuSqcPorts[idx];
|
||||
} else {
|
||||
assert(if_name == "cu_scalar_ports");
|
||||
if (idx > numScalarPorts)
|
||||
panic("ProtocolTester: unknown cu scal port %d\n", idx);
|
||||
return *cuScalarPorts[idx];
|
||||
}
|
||||
}
|
||||
|
||||
assert(false);
|
||||
}
|
||||
|
||||
bool
|
||||
ProtocolTester::checkExit()
|
||||
{
|
||||
if (nextEpisodeId > maxNumEpisodes) {
|
||||
if (!sentExitSignal) {
|
||||
// all done
|
||||
inform("Total completed episodes: %d\n", nextEpisodeId - 1);
|
||||
exitSimLoop("GPU Ruby Tester: Passed!");
|
||||
sentExitSignal = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
ProtocolTester::checkDRF(Location atomic_loc,
|
||||
Location loc, bool isStore) const
|
||||
{
|
||||
if (debugTester) {
|
||||
// go through all active episodes in all threads
|
||||
for (const GpuThread* th : wfs) {
|
||||
if (!th->checkDRF(atomic_loc, loc, isStore))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const GpuThread* th : cpuThreads) {
|
||||
if (!th->checkDRF(atomic_loc, loc, isStore))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ProtocolTester::dumpErrorLog(std::stringstream& ss)
|
||||
{
|
||||
if (!sentExitSignal) {
|
||||
// go through all threads and dump their outstanding requests
|
||||
for (auto t : cpuThreads) {
|
||||
t->printAllOutstandingReqs(ss);
|
||||
}
|
||||
|
||||
for (auto t : wfs) {
|
||||
t->printAllOutstandingReqs(ss);
|
||||
}
|
||||
|
||||
// dump error log into a file
|
||||
assert(logFile);
|
||||
ccprintf(*(logFile->stream()), "%s", ss.str());
|
||||
logFile->stream()->flush();
|
||||
|
||||
sentExitSignal = true;
|
||||
// terminate the simulation
|
||||
panic("GPU Ruby Tester: Failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
// get the requesting thread from the original sender state
|
||||
ProtocolTester::SenderState* senderState =
|
||||
safe_cast<ProtocolTester::SenderState*>(pkt->senderState);
|
||||
GpuThread *th = senderState->th;
|
||||
|
||||
th->hitCallback(pkt);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ProtocolTester*
|
||||
ProtocolTesterParams::create() const
|
||||
{
|
||||
return new ProtocolTester(*this);
|
||||
}
|
||||
178
src/cpu/testers/gpu_ruby_test/protocol_tester.hh
Normal file
178
src/cpu/testers/gpu_ruby_test/protocol_tester.hh
Normal file
@@ -0,0 +1,178 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
|
||||
#define CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
|
||||
|
||||
/*
|
||||
* The tester includes the main ProtocolTester that manages all ports to the
|
||||
* memory system.
|
||||
* GpuThreads are mapped to certain data port(s)
|
||||
*
|
||||
* GpuThreads inject memory requests through their data ports.
|
||||
* The tester receives and validates responses from the memory.
|
||||
*
|
||||
* Main components
|
||||
* - AddressManager: generate DRF request streams &
|
||||
* validate data response against an internal log_table
|
||||
* - Episode: a sequence of requests
|
||||
* - Thread: either GPU wavefront or CPU thread
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/types.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
#include "params/ProtocolTester.hh"
|
||||
|
||||
class GpuThread;
|
||||
class CpuThread;
|
||||
class GpuWavefront;
|
||||
|
||||
class ProtocolTester : public ClockedObject
|
||||
{
|
||||
public:
|
||||
class SeqPort : public RequestPort
|
||||
{
|
||||
public:
|
||||
SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id,
|
||||
PortID _index)
|
||||
: RequestPort(_name, _tester, _id)
|
||||
{}
|
||||
|
||||
protected:
|
||||
virtual bool recvTimingResp(PacketPtr pkt);
|
||||
virtual void recvReqRetry()
|
||||
{ panic("%s does not expect a retry\n", name()); }
|
||||
};
|
||||
|
||||
struct SenderState : public Packet::SenderState
|
||||
{
|
||||
GpuThread* th;
|
||||
SenderState(GpuThread* _th)
|
||||
{
|
||||
assert(_th);
|
||||
th = _th;
|
||||
}
|
||||
|
||||
~SenderState()
|
||||
{}
|
||||
};
|
||||
|
||||
public:
|
||||
typedef ProtocolTesterParams Params;
|
||||
ProtocolTester(const Params &p);
|
||||
~ProtocolTester();
|
||||
|
||||
typedef AddressManager::Location Location;
|
||||
typedef AddressManager::Value Value;
|
||||
|
||||
void init();
|
||||
RequestorID requestorId() { return _requestorId; };
|
||||
Port& getPort(const std::string &if_name,
|
||||
PortID idx=InvalidPortID) override;
|
||||
|
||||
int getEpisodeLength() const { return episodeLength; }
|
||||
// return pointer to the address manager
|
||||
AddressManager* getAddressManager() const { return addrManager; }
|
||||
// return true if the tester should stop issuing new episodes
|
||||
bool checkExit();
|
||||
// verify if a location to be picked for LD/ST will satisfy
|
||||
// data race free requirement
|
||||
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
|
||||
// return the next episode id and increment it
|
||||
int getNextEpisodeID() { return nextEpisodeId++; }
|
||||
// get action sequence number
|
||||
int getActionSeqNum() { return actionCount++; }
|
||||
|
||||
// dump error log into a file and exit the simulation
|
||||
void dumpErrorLog(std::stringstream& ss);
|
||||
|
||||
private:
|
||||
RequestorID _requestorId;
|
||||
|
||||
// list of parameters taken from python scripts
|
||||
int numCpuPorts;
|
||||
int numVectorPorts;
|
||||
int numSqcPorts;
|
||||
int numScalarPorts;
|
||||
int numCusPerSqc;
|
||||
int numCusPerScalar;
|
||||
int numWfsPerCu;
|
||||
int numWisPerWf;
|
||||
// parameters controlling the address range that the tester can access
|
||||
int numAtomicLocs;
|
||||
int numNormalLocsPerAtomic;
|
||||
// the number of actions in an episode (episodeLength +- random number)
|
||||
int episodeLength;
|
||||
// the maximum number of episodes to be completed by this tester
|
||||
int maxNumEpisodes;
|
||||
// are we debuggin the tester
|
||||
bool debugTester;
|
||||
|
||||
// all available requestor ports connected to Ruby
|
||||
std::vector<RequestPort*> cpuPorts; // cpu data ports
|
||||
std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
|
||||
std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
|
||||
std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
|
||||
// all CPU and GPU threads
|
||||
std::vector<CpuThread*> cpuThreads;
|
||||
std::vector<GpuWavefront*> wfs;
|
||||
|
||||
// address manager that (1) generates DRF sequences of requests,
|
||||
// (2) manages an internal log table and
|
||||
// (3) validate response data
|
||||
AddressManager* addrManager;
|
||||
|
||||
// number of CPUs and CUs
|
||||
int numCpus;
|
||||
int numCus;
|
||||
// unique id of the next episode
|
||||
int nextEpisodeId;
|
||||
|
||||
// global action count. Overflow is fine. It's used to uniquely identify
|
||||
// per-wave & per-instruction memory requests in the coalescer
|
||||
int actionCount;
|
||||
|
||||
// if an exit signal was already sent
|
||||
bool sentExitSignal;
|
||||
|
||||
OutputStream* logFile;
|
||||
};
|
||||
|
||||
#endif /* CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ */
|
||||
Reference in New Issue
Block a user