tests,configs,mem-ruby: Adding Ruby tester for GPU_VIPER
This patch adds the GPU protocol tester that uses data-race-free operation to discover bugs in GPU protocols including GPU_VIPER. For more information please see the following paper and the README: T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free GPU Testing," 2019 IEEE International Symposium on Workload Characterization (IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi: 10.1109/IISWC47752.2019.9042019. Change-Id: Ic9939d131a930d1e7014ed0290601140bdd1499f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32855 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
# Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
|
# Copyright (c) 2018-2020 Advanced Micro Devices, Inc.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# For use for simulation and test purposes only
|
# For use for simulation and test purposes only
|
||||||
@@ -43,145 +43,272 @@ addToPath('../')
|
|||||||
from common import Options
|
from common import Options
|
||||||
from ruby import Ruby
|
from ruby import Ruby
|
||||||
|
|
||||||
# Get paths we might need.
|
|
||||||
config_path = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
config_root = os.path.dirname(config_path)
|
|
||||||
m5_root = os.path.dirname(config_root)
|
|
||||||
|
|
||||||
parser = optparse.OptionParser()
|
|
||||||
Options.addNoISAOptions(parser)
|
|
||||||
|
|
||||||
parser.add_option("--maxloads", metavar="N", default=100,
|
|
||||||
help="Stop after N loads")
|
|
||||||
parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
|
|
||||||
help="Wakeup every N cycles")
|
|
||||||
parser.add_option("-u", "--num-compute-units", type="int", default=1,
|
|
||||||
help="number of compute units in the GPU")
|
|
||||||
parser.add_option("--num-cp", type="int", default=0,
|
|
||||||
help="Number of GPU Command Processors (CP)")
|
|
||||||
# not super important now, but to avoid putting the number 4 everywhere, make
|
|
||||||
# it an option/knob
|
|
||||||
parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs \
|
|
||||||
sharing an SQC (icache, and thus icache TLB)")
|
|
||||||
parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
|
|
||||||
"per CU")
|
|
||||||
parser.add_option("--wf-size", type="int", default=64,
|
|
||||||
help="Wavefront size(in workitems)")
|
|
||||||
parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
|
|
||||||
"WF slots per SIMD")
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Add the ruby specific and protocol specific options
|
# Add the ruby specific and protocol specific options
|
||||||
#
|
#
|
||||||
|
parser = optparse.OptionParser()
|
||||||
|
Options.addNoISAOptions(parser)
|
||||||
Ruby.define_options(parser)
|
Ruby.define_options(parser)
|
||||||
|
|
||||||
exec(compile( \
|
# GPU Ruby tester options
|
||||||
open(os.path.join(config_root, "common", "Options.py")).read(), \
|
parser.add_option("--cache-size", type="choice", default="small",
|
||||||
os.path.join(config_root, "common", "Options.py"), 'exec'))
|
choices=["small", "large"],
|
||||||
|
help="Cache sizes to use. Small encourages races between \
|
||||||
|
requests and writebacks. Large stresses write-through \
|
||||||
|
and/or write-back GPU caches.")
|
||||||
|
parser.add_option("--system-size", type="choice", default="small",
|
||||||
|
choices=["small", "medium", "large"],
|
||||||
|
help="This option defines how many CUs, CPUs and cache \
|
||||||
|
components in the test system.")
|
||||||
|
parser.add_option("--address-range", type="choice", default="small",
|
||||||
|
choices=["small", "large"],
|
||||||
|
help="This option defines the number of atomic \
|
||||||
|
locations that affects the working set's size. \
|
||||||
|
A small number of atomic locations encourage more \
|
||||||
|
races among threads. The large option stresses cache \
|
||||||
|
resources.")
|
||||||
|
parser.add_option("--episode-length", type="choice", default="short",
|
||||||
|
choices=["short", "medium", "long"],
|
||||||
|
help="This option defines the number of LDs and \
|
||||||
|
STs in an episode. The small option encourages races \
|
||||||
|
between the start and end of an episode. The long \
|
||||||
|
option encourages races between LDs and STs in the \
|
||||||
|
same episode.")
|
||||||
|
parser.add_option("--test-length", type="int", default=1,
|
||||||
|
help="The number of episodes to be executed by each \
|
||||||
|
wavefront. This determines the maximum number, i.e., \
|
||||||
|
val X #WFs, of episodes to be executed in the test.")
|
||||||
|
parser.add_option("--debug-tester", action='store_true',
|
||||||
|
help="This option will turn on DRF checker")
|
||||||
|
parser.add_option("--random-seed", type="int", default=0,
|
||||||
|
help="Random seed number. Default value (i.e., 0) means \
|
||||||
|
using runtime-specific value")
|
||||||
|
parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
|
||||||
|
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
#
|
|
||||||
# Set the default cache size and associativity to be very small to encourage
|
|
||||||
# races between requests and writebacks.
|
|
||||||
#
|
|
||||||
options.l1d_size="256B"
|
|
||||||
options.l1i_size="256B"
|
|
||||||
options.l2_size="512B"
|
|
||||||
options.l3_size="1kB"
|
|
||||||
options.l1d_assoc=2
|
|
||||||
options.l1i_assoc=2
|
|
||||||
options.l2_assoc=2
|
|
||||||
options.l3_assoc=2
|
|
||||||
|
|
||||||
# This file can support multiple compute units
|
|
||||||
assert(options.num_compute_units >= 1)
|
|
||||||
n_cu = options.num_compute_units
|
|
||||||
|
|
||||||
options.num_sqc = int((n_cu + options.cu_per_sqc - 1) // options.cu_per_sqc)
|
|
||||||
|
|
||||||
if args:
|
if args:
|
||||||
print("Error: script doesn't take any positional arguments")
|
print("Error: script doesn't take any positional arguments")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Create the ruby random tester
|
# Set up cache size - 2 options
|
||||||
|
# 0: small cache
|
||||||
|
# 1: large cache
|
||||||
#
|
#
|
||||||
|
if (options.cache_size == "small"):
|
||||||
# Check to for the GPU_RfO protocol. Other GPU protocols are non-SC and will
|
options.tcp_size="256B"
|
||||||
# not work with the Ruby random tester.
|
options.tcp_assoc=2
|
||||||
assert(buildEnv['PROTOCOL'] == 'GPU_RfO')
|
options.tcc_size="1kB"
|
||||||
|
options.tcc_assoc=2
|
||||||
# The GPU_RfO protocol does not support cache flushes
|
elif (options.cache_size == "large"):
|
||||||
check_flush = False
|
options.tcp_size="256kB"
|
||||||
|
options.tcp_assoc=16
|
||||||
tester = RubyTester(check_flush=check_flush,
|
options.tcc_size="1024kB"
|
||||||
checks_to_complete=options.maxloads,
|
options.tcc_assoc=16
|
||||||
wakeup_frequency=options.wakeup_freq,
|
|
||||||
deadlock_threshold=1000000)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Create the M5 system. Note that the Memory Object isn't
|
# Set up system size - 3 options
|
||||||
# actually used by the rubytester, but is included to support the
|
|
||||||
# M5 memory size == Ruby memory size checks
|
|
||||||
#
|
#
|
||||||
system = System(cpu=tester, mem_ranges=[AddrRange(options.mem_size)])
|
if (options.system_size == "small"):
|
||||||
|
# 1 CU, 1 CPU, 1 SQC, 1 Scalar
|
||||||
|
options.wf_size = 1
|
||||||
|
options.wavefronts_per_cu = 1
|
||||||
|
options.num_cpus = 1
|
||||||
|
options.cu_per_sqc = 1
|
||||||
|
options.cu_per_scalar_cache = 1
|
||||||
|
options.num_compute_units = 1
|
||||||
|
elif (options.system_size == "medium"):
|
||||||
|
# 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
|
||||||
|
options.wf_size = 16
|
||||||
|
options.wavefronts_per_cu = 4
|
||||||
|
options.num_cpus = 4
|
||||||
|
options.cu_per_sqc = 4
|
||||||
|
options.cu_per_scalar_cache = 4
|
||||||
|
options.num_compute_units = 4
|
||||||
|
elif (options.system_size == "large"):
|
||||||
|
# 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
|
||||||
|
options.wf_size = 32
|
||||||
|
options.wavefronts_per_cu = 4
|
||||||
|
options.num_cpus = 4
|
||||||
|
options.cu_per_sqc = 4
|
||||||
|
options.cu_per_scalar_cache = 4
|
||||||
|
options.num_compute_units = 8
|
||||||
|
|
||||||
# Create a top-level voltage domain and clock domain
|
#
|
||||||
system.voltage_domain = VoltageDomain(voltage=options.sys_voltage)
|
# Set address range - 2 options
|
||||||
|
# level 0: small
|
||||||
|
# level 1: large
|
||||||
|
# Each location corresponds to a 4-byte piece of data
|
||||||
|
#
|
||||||
|
options.mem_size = '1024MB'
|
||||||
|
if (options.address_range == "small"):
|
||||||
|
num_atomic_locs = 10
|
||||||
|
num_regular_locs_per_atomic_loc = 10000
|
||||||
|
elif (options.address_range == "large"):
|
||||||
|
num_atomic_locs = 100
|
||||||
|
num_regular_locs_per_atomic_loc = 100000
|
||||||
|
|
||||||
system.clk_domain = SrcClockDomain(clock=options.sys_clock,
|
#
|
||||||
voltage_domain=system.voltage_domain)
|
# Set episode length (# of actions per episode) - 3 options
|
||||||
|
# 0: 10 actions
|
||||||
|
# 1: 100 actions
|
||||||
|
# 2: 500 actions
|
||||||
|
#
|
||||||
|
if (options.episode_length == "short"):
|
||||||
|
eps_length = 10
|
||||||
|
elif (options.episode_length == "medium"):
|
||||||
|
eps_length = 100
|
||||||
|
elif (options.episode_length == "long"):
|
||||||
|
eps_length = 500
|
||||||
|
|
||||||
|
#
|
||||||
|
# Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the
|
||||||
|
# primary check for deadlocks. The tester's deadlock threshold detection is
|
||||||
|
# a secondary check for deadlock. If there is a bug in RubyPort that causes
|
||||||
|
# a packet not to return to the tester properly, the tester will issue a
|
||||||
|
# deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold
|
||||||
|
# to detect deadlock caused by Ruby protocol first before one caused by the
|
||||||
|
# coalescer. Both units are in Ticks
|
||||||
|
#
|
||||||
|
options.cache_deadlock_threshold = 1e8
|
||||||
|
tester_deadlock_threshold = 1e9
|
||||||
|
|
||||||
|
# For now we're testing only GPU protocol, so we force num_cpus to be 0
|
||||||
|
options.num_cpus = 0
|
||||||
|
|
||||||
|
# Number of CUs
|
||||||
|
n_CUs = options.num_compute_units
|
||||||
|
|
||||||
|
# Set test length, i.e., number of episodes per wavefront * #WFs.
|
||||||
|
# Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
|
||||||
|
n_WFs = n_CUs * options.wavefronts_per_cu
|
||||||
|
max_episodes = options.test_length * n_WFs
|
||||||
|
|
||||||
|
# Number of SQC and Scalar caches
|
||||||
|
assert(n_CUs % options.cu_per_sqc == 0)
|
||||||
|
n_SQCs = n_CUs // options.cu_per_sqc
|
||||||
|
options.num_sqc = n_SQCs
|
||||||
|
|
||||||
|
assert(options.cu_per_scalar_cache != 0)
|
||||||
|
n_Scalars = n_CUs // options.cu_per_scalar_cache
|
||||||
|
options.num_scalar_cache = n_Scalars
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create GPU Ruby random tester
|
||||||
|
#
|
||||||
|
tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
|
||||||
|
cus_per_scalar = options.cu_per_scalar_cache,
|
||||||
|
wavefronts_per_cu = options.wavefronts_per_cu,
|
||||||
|
workitems_per_wavefront = options.wf_size,
|
||||||
|
num_atomic_locations = num_atomic_locs,
|
||||||
|
num_normal_locs_per_atomic = \
|
||||||
|
num_regular_locs_per_atomic_loc,
|
||||||
|
max_num_episodes = max_episodes,
|
||||||
|
episode_length = eps_length,
|
||||||
|
debug_tester = options.debug_tester,
|
||||||
|
random_seed = options.random_seed,
|
||||||
|
log_file = options.log_file)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create a gem5 system. Note that the memory object isn't actually used by the
|
||||||
|
# tester, but is included to ensure the gem5 memory size == Ruby memory size
|
||||||
|
# checks. The system doesn't have real CPUs or CUs. It just has a tester that
|
||||||
|
# has physical ports to be connected to Ruby
|
||||||
|
#
|
||||||
|
system = System(cpu = tester,
|
||||||
|
mem_ranges = [AddrRange(options.mem_size)],
|
||||||
|
cache_line_size = options.cacheline_size,
|
||||||
|
mem_mode = 'timing')
|
||||||
|
|
||||||
|
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
|
||||||
|
system.clk_domain = SrcClockDomain(clock = options.sys_clock,
|
||||||
|
voltage_domain = system.voltage_domain)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Command processor is not needed for the tester since we don't run real
|
||||||
|
# kernels. Setting it to zero disables the VIPER protocol from creating
|
||||||
|
# a command processor and its caches.
|
||||||
|
#
|
||||||
|
options.num_cp = 0
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create the Ruby system
|
||||||
|
#
|
||||||
Ruby.create_system(options, False, system)
|
Ruby.create_system(options, False, system)
|
||||||
|
|
||||||
# Create a seperate clock domain for Ruby
|
|
||||||
system.ruby.clk_domain = SrcClockDomain(clock=options.ruby_clock,
|
|
||||||
voltage_domain=system.voltage_domain)
|
|
||||||
|
|
||||||
tester.num_cpus = len(system.ruby._cpu_ports)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# The tester is most effective when randomization is turned on and
|
# The tester is most effective when randomization is turned on and
|
||||||
# artifical delay is randomly inserted on messages
|
# artifical delay is randomly inserted on messages
|
||||||
#
|
#
|
||||||
system.ruby.randomization = True
|
system.ruby.randomization = True
|
||||||
|
|
||||||
for ruby_port in system.ruby._cpu_ports:
|
# Assert that we got the right number of Ruby ports
|
||||||
|
assert(len(system.ruby._cpu_ports) == n_CUs + n_SQCs + n_Scalars)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Tie the ruby tester ports to the ruby cpu read and write ports
|
# Attach Ruby ports to the tester in the order:
|
||||||
#
|
# cpu_sequencers,
|
||||||
if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
|
# vector_coalescers,
|
||||||
tester.cpuInstDataPort = ruby_port.slave
|
# sqc_sequencers,
|
||||||
elif ruby_port.support_data_reqs:
|
# scalar_sequencers
|
||||||
tester.cpuDataPort = ruby_port.slave
|
#
|
||||||
elif ruby_port.support_inst_reqs:
|
# Note that this requires the protocol to create sequencers in this order
|
||||||
tester.cpuInstPort = ruby_port.slave
|
#
|
||||||
|
print("Attaching ruby ports to the tester")
|
||||||
# Do not automatically retry stalled Ruby requests
|
for i, ruby_port in enumerate(system.ruby._cpu_ports):
|
||||||
ruby_port.no_retry_on_stall = True
|
ruby_port.no_retry_on_stall = True
|
||||||
|
|
||||||
#
|
|
||||||
# Tell each sequencer this is the ruby tester so that it
|
|
||||||
# copies the subblock back to the checker
|
|
||||||
#
|
|
||||||
ruby_port.using_ruby_tester = True
|
ruby_port.using_ruby_tester = True
|
||||||
|
|
||||||
# -----------------------
|
if i < n_CUs:
|
||||||
# run simulation
|
tester.cu_vector_ports = ruby_port.in_ports
|
||||||
# -----------------------
|
tester.cu_token_ports = ruby_port.gmTokenPort
|
||||||
|
tester.max_cu_tokens = 4*n_WFs
|
||||||
|
elif i < (n_CUs + n_SQCs):
|
||||||
|
tester.cu_sqc_ports = ruby_port.in_ports
|
||||||
|
else:
|
||||||
|
tester.cu_scalar_ports = ruby_port.in_ports
|
||||||
|
|
||||||
root = Root( full_system = False, system = system )
|
i += 1
|
||||||
root.system.mem_mode = 'timing'
|
|
||||||
|
#
|
||||||
|
# No CPU threads are needed for GPU tester
|
||||||
|
#
|
||||||
|
tester.cpu_threads = []
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create GPU wavefronts
|
||||||
|
#
|
||||||
|
thread_clock = SrcClockDomain(clock = '1GHz',
|
||||||
|
voltage_domain = system.voltage_domain)
|
||||||
|
wavefronts = []
|
||||||
|
g_thread_idx = 0
|
||||||
|
print("Creating %i WFs attached to %i CUs" % \
|
||||||
|
(n_CUs * tester.wavefronts_per_cu, n_CUs))
|
||||||
|
for cu_idx in range(n_CUs):
|
||||||
|
for wf_idx in range(tester.wavefronts_per_cu):
|
||||||
|
wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
|
||||||
|
cu_id = cu_idx,
|
||||||
|
num_lanes = options.wf_size,
|
||||||
|
clk_domain = thread_clock,
|
||||||
|
deadlock_threshold = \
|
||||||
|
tester_deadlock_threshold))
|
||||||
|
g_thread_idx += 1
|
||||||
|
tester.wavefronts = wavefronts
|
||||||
|
|
||||||
|
#
|
||||||
|
# Run simulation
|
||||||
|
#
|
||||||
|
root = Root(full_system = False, system = system)
|
||||||
|
|
||||||
# Not much point in this being higher than the L1 latency
|
# Not much point in this being higher than the L1 latency
|
||||||
m5.ticks.setGlobalFrequency('1ns')
|
m5.ticks.setGlobalFrequency('1ns')
|
||||||
|
|
||||||
# instantiate configuration
|
# Instantiate configuration
|
||||||
m5.instantiate()
|
m5.instantiate()
|
||||||
|
|
||||||
# simulate until program terminates
|
# Simulate until tester completes
|
||||||
exit_event = m5.simulate(options.abs_max_tick)
|
exit_event = m5.simulate()
|
||||||
|
|
||||||
print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
|
print('Exiting tick: ', m5.curTick())
|
||||||
|
print('Exiting because ', exit_event.getCause())
|
||||||
|
|||||||
39
src/cpu/testers/gpu_ruby_test/CpuThread.py
Normal file
39
src/cpu/testers/gpu_ruby_test/CpuThread.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For use for simulation and test purposes only
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from this
|
||||||
|
# software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
from m5.params import *
|
||||||
|
from m5.proxy import *
|
||||||
|
|
||||||
|
from m5.objects.GpuThread import GpuThread
|
||||||
|
|
||||||
|
class CpuThread(GpuThread):
|
||||||
|
type = 'CpuThread'
|
||||||
|
cxx_header = "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||||
42
src/cpu/testers/gpu_ruby_test/GpuThread.py
Normal file
42
src/cpu/testers/gpu_ruby_test/GpuThread.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For use for simulation and test purposes only
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from this
|
||||||
|
# software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
from m5.objects.ClockedObject import ClockedObject
|
||||||
|
from m5.params import *
|
||||||
|
from m5.proxy import *
|
||||||
|
|
||||||
|
class GpuThread(ClockedObject):
|
||||||
|
type = 'GpuThread'
|
||||||
|
abstract = True
|
||||||
|
cxx_header = "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||||
|
thread_id = Param.Int("Unique GpuThread ID")
|
||||||
|
num_lanes = Param.Int("Number of lanes this thread has")
|
||||||
|
deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
|
||||||
40
src/cpu/testers/gpu_ruby_test/GpuWavefront.py
Normal file
40
src/cpu/testers/gpu_ruby_test/GpuWavefront.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For use for simulation and test purposes only
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from this
|
||||||
|
# software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
from m5.params import *
|
||||||
|
from m5.proxy import *
|
||||||
|
|
||||||
|
from m5.objects.GpuThread import GpuThread
|
||||||
|
|
||||||
|
class GpuWavefront(GpuThread):
|
||||||
|
type = 'GpuWavefront'
|
||||||
|
cxx_header = "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||||
|
cu_id = Param.Int("Compute Unit ID")
|
||||||
64
src/cpu/testers/gpu_ruby_test/ProtocolTester.py
Normal file
64
src/cpu/testers/gpu_ruby_test/ProtocolTester.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For use for simulation and test purposes only
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from this
|
||||||
|
# software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
from m5.objects.ClockedObject import ClockedObject
|
||||||
|
from m5.params import *
|
||||||
|
from m5.proxy import *
|
||||||
|
|
||||||
|
class ProtocolTester(ClockedObject):
|
||||||
|
type = 'ProtocolTester'
|
||||||
|
cxx_header = "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||||
|
|
||||||
|
cpu_ports = VectorRequestPort("Ports for CPUs")
|
||||||
|
cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
|
||||||
|
cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
|
||||||
|
cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
|
||||||
|
|
||||||
|
cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
|
||||||
|
cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
|
||||||
|
|
||||||
|
wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
|
||||||
|
workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
|
||||||
|
|
||||||
|
cpu_threads = VectorParam.CpuThread("All cpus")
|
||||||
|
wavefronts = VectorParam.GpuWavefront("All wavefronts")
|
||||||
|
|
||||||
|
num_atomic_locations = Param.Int(2, "Number of atomic locations")
|
||||||
|
num_normal_locs_per_atomic = Param.Int(1000, \
|
||||||
|
"Number of normal locations per atomic")
|
||||||
|
|
||||||
|
episode_length = Param.Int(10, "Number of actions per episode")
|
||||||
|
max_num_episodes = Param.Int(20, "Maximum number of episodes")
|
||||||
|
debug_tester = Param.Bool(False, "Are we debugging the tester?")
|
||||||
|
random_seed = Param.Int(0, "Random seed number. Default value (0) means \
|
||||||
|
using runtime-specific value.")
|
||||||
|
log_file = Param.String("Log file's name")
|
||||||
|
system = Param.System(Parent.any, "System we belong to")
|
||||||
129
src/cpu/testers/gpu_ruby_test/README
Normal file
129
src/cpu/testers/gpu_ruby_test/README
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
This directory contains a tester for gem5 GPU protocols. Unlike the Ruby random
|
||||||
|
teter, this tester does not rely on sequential consistency. Instead, it
|
||||||
|
assumes tested protocols supports release consistency.
|
||||||
|
|
||||||
|
----- Getting Started -----
|
||||||
|
|
||||||
|
To start using the tester quickly, you can use the following example command
|
||||||
|
line to get running immediately:
|
||||||
|
|
||||||
|
build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
|
||||||
|
--test-length=1000 --system-size=medium --cache-size=small
|
||||||
|
|
||||||
|
An overview of the main command line options is as follows. For all options
|
||||||
|
use `build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help`
|
||||||
|
or see the configuration file.
|
||||||
|
|
||||||
|
* --cache-size (small, large): Use smaller sizes for testing evict, etc.
|
||||||
|
* --system-size (small, medium, large): Effectively the number of threads in
|
||||||
|
the GPU model. Large size will have more contention. Larger
|
||||||
|
sizes are useful for checking contention.
|
||||||
|
* --episode-length (short, medium, long): Number of loads and stores in an
|
||||||
|
episode. Episodes will also have atomics mixed in. See below
|
||||||
|
for a definition of episode.
|
||||||
|
* --test-length (int): Number of episodes to execute. This will determine the
|
||||||
|
amount of time the tester runs for. Longer time will stress
|
||||||
|
the protocol harder.
|
||||||
|
|
||||||
|
The remainder of this file describes the theory behind the tester design and
|
||||||
|
a link to a more detailed research paper is provided at the end.
|
||||||
|
|
||||||
|
----- Theory Overview -----
|
||||||
|
|
||||||
|
The GPU Ruby tester creates a system consisting of both CPU threads and GPU
|
||||||
|
wavefronts. CPU threads are scalar, so there is one lane per CPU thread. GPU
|
||||||
|
wavefront may have multiple lanes. The number of lanes is initialized when
|
||||||
|
a thread/wavefront is created.
|
||||||
|
|
||||||
|
Each thread/wavefront executes a number of episodes. Each episode is a series
|
||||||
|
of memory actions (i.e., atomic, load, store, acquire and release). In a
|
||||||
|
wavefront, all lanes execute the same sequence of actions, but they may target
|
||||||
|
different addresses. One can think of an episode as a critical section which
|
||||||
|
is bounded by a lock acquire in the beginning and a lock release at the end. An
|
||||||
|
episode consists of actions in the following order:
|
||||||
|
|
||||||
|
1 - Atomic action
|
||||||
|
2 - Acquire action
|
||||||
|
3 - A number of load and store actions
|
||||||
|
4 - Release action
|
||||||
|
5 - Atomic action that targets the same address as (1) does
|
||||||
|
|
||||||
|
There are two separate set of addresses: atomic and non-atomic. Atomic actions
|
||||||
|
target only atomic addresses. Load and store actions target only non-atomic
|
||||||
|
addresses. Memory addresses are all 4-byte aligned in the tester.
|
||||||
|
|
||||||
|
To test false sharing cases in which both atomic and non-atomic addresses are
|
||||||
|
placed in the same cache line, we abstract out the concept of memory addresses
|
||||||
|
from the tester's perspective by introducing the concept of location. Locations
|
||||||
|
are numbered from 0 to N-1 (if there are N addresses). The first X locations
|
||||||
|
[0..X-1] are atomic locations, and the rest are non-atomic locations.
|
||||||
|
The 1-1 mapping between locations and addresses are randomly created when the
|
||||||
|
tester is initialized.
|
||||||
|
|
||||||
|
Per load and store action, its target location is selected so that there is no
|
||||||
|
data race in the generated stream of memory requests at any time during the
|
||||||
|
test. Since in Data-Race-Free model, the memory system's behavior is undefined
|
||||||
|
in data race cases, we exclude data race scenarios from our protocol test.
|
||||||
|
|
||||||
|
Once location per load/store action is determined, each thread/wavefront either
|
||||||
|
loads current value at the location or stores an incremental value to that
|
||||||
|
location. The tester maintains a table tracking all last writers and their
|
||||||
|
written values, so we know what value should be returned from a load and what
|
||||||
|
value should be written next at a particular location. Value returned from a
|
||||||
|
load must match with the value written by the last writer.
|
||||||
|
|
||||||
|
----- Directory Structure -----
|
||||||
|
|
||||||
|
ProtocolTester.hh/cc -- This is the main tester class that orchestrates the
|
||||||
|
entire test.
|
||||||
|
AddressManager.hh/cc -- This manages address space, randomly maps address to
|
||||||
|
location, generates locations for all episodes,
|
||||||
|
maintains per-location last writer and validates
|
||||||
|
values returned from load actions.
|
||||||
|
GpuThread.hh/cc -- This is abstract class for CPU threads and GPU
|
||||||
|
wavefronts. It generates and executes a series of
|
||||||
|
episodes.
|
||||||
|
CpuThread.hh/cc -- Thread class for CPU threads. Not fully implemented yet
|
||||||
|
GpuWavefront.hh/cc -- GpuThread class for GPU wavefronts.
|
||||||
|
Episode.hh/cc -- Class to encapsulate an episode, notably including
|
||||||
|
episode load/store structure and ordering.
|
||||||
|
|
||||||
|
For more detail, please see the following paper:
|
||||||
|
|
||||||
|
T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free
|
||||||
|
GPU Testing," 2019 IEEE International Symposium on Workload Characterization
|
||||||
|
(IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi:
|
||||||
|
10.1109/IISWC47752.2019.9042019.
|
||||||
54
src/cpu/testers/gpu_ruby_test/SConscript
Normal file
54
src/cpu/testers/gpu_ruby_test/SConscript
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For use for simulation and test purposes only
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from this
|
||||||
|
# software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
#
|
||||||
|
|
||||||
|
Import('*')
|
||||||
|
|
||||||
|
if not env['BUILD_GPU']:
|
||||||
|
Return()
|
||||||
|
|
||||||
|
if env['PROTOCOL'] == 'None':
|
||||||
|
Return()
|
||||||
|
|
||||||
|
SimObject('ProtocolTester.py')
|
||||||
|
SimObject('GpuThread.py')
|
||||||
|
SimObject('CpuThread.py')
|
||||||
|
SimObject('GpuWavefront.py')
|
||||||
|
|
||||||
|
Source('address_manager.cc')
|
||||||
|
Source('episode.cc')
|
||||||
|
Source('protocol_tester.cc')
|
||||||
|
Source('gpu_thread.cc')
|
||||||
|
Source('cpu_thread.cc')
|
||||||
|
Source('gpu_wavefront.cc')
|
||||||
|
|
||||||
|
DebugFlag('ProtocolTest')
|
||||||
431
src/cpu/testers/gpu_ruby_test/address_manager.cc
Normal file
431
src/cpu/testers/gpu_ruby_test/address_manager.cc
Normal file
@@ -0,0 +1,431 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "base/intmath.hh"
|
||||||
|
#include "base/logging.hh"
|
||||||
|
#include "base/random.hh"
|
||||||
|
#include "base/trace.hh"
|
||||||
|
|
||||||
|
const int AddressManager::INVALID_VALUE = -1;
|
||||||
|
const int AddressManager::INVALID_LOCATION = -1;
|
||||||
|
|
||||||
|
AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
|
||||||
|
: numAtomicLocs(n_atomic_locs),
|
||||||
|
numLocsPerAtomic(n_normal_locs_per_atomic)
|
||||||
|
{
|
||||||
|
assert(numAtomicLocs > 0 && numLocsPerAtomic > 0);
|
||||||
|
numNormalLocs = numAtomicLocs * numLocsPerAtomic;
|
||||||
|
|
||||||
|
// generate random address map
|
||||||
|
randAddressMap.resize(numAtomicLocs + numNormalLocs);
|
||||||
|
for (Location i = 0; i < numAtomicLocs + numNormalLocs; ++i) {
|
||||||
|
// all addresses are sizeof(Value) (i.e., 4-byte) aligned
|
||||||
|
randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// randomly shuffle randAddressMap
|
||||||
|
std::random_shuffle(randAddressMap.begin(), randAddressMap.end());
|
||||||
|
|
||||||
|
// initialize atomic locations
|
||||||
|
// first and last normal location per atomic location
|
||||||
|
Location first, last;
|
||||||
|
for (Location atomic_loc = 0; atomic_loc < numAtomicLocs; ++atomic_loc) {
|
||||||
|
first = numAtomicLocs + numLocsPerAtomic * atomic_loc;
|
||||||
|
last = first + numLocsPerAtomic - 1;
|
||||||
|
atomicStructs.push_back(new AtomicStruct(atomic_loc, first, last));
|
||||||
|
}
|
||||||
|
|
||||||
|
// initialize log table
|
||||||
|
for (Location loc = 0; loc < numAtomicLocs + numNormalLocs; ++loc) {
|
||||||
|
logTable.push_back(new LastWriter());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::~AddressManager()
|
||||||
|
{
|
||||||
|
for (AtomicStruct* atomic_struct : atomicStructs)
|
||||||
|
delete atomic_struct;
|
||||||
|
for (LastWriter* lw : logTable)
|
||||||
|
delete lw;
|
||||||
|
}
|
||||||
|
|
||||||
|
Addr
|
||||||
|
AddressManager::getAddress(Location loc)
|
||||||
|
{
|
||||||
|
assert(loc < numAtomicLocs + numNormalLocs && loc >= 0);
|
||||||
|
return randAddressMap[loc];
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Location
|
||||||
|
AddressManager::getAtomicLoc()
|
||||||
|
{
|
||||||
|
Location ret_atomic_loc = random() % numAtomicLocs;
|
||||||
|
atomicStructs[ret_atomic_loc]->startLocSelection();
|
||||||
|
return ret_atomic_loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Location
|
||||||
|
AddressManager::getLoadLoc(Location atomic_loc)
|
||||||
|
{
|
||||||
|
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||||
|
return atomicStructs[atomic_loc]->getLoadLoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Location
|
||||||
|
AddressManager::getStoreLoc(Location atomic_loc)
|
||||||
|
{
|
||||||
|
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||||
|
return atomicStructs[atomic_loc]->getStoreLoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AddressManager::finishLocSelection(Location atomic_loc)
|
||||||
|
{
|
||||||
|
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||||
|
atomicStructs[atomic_loc]->endLocSelection();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AddressManager::releaseLocation(Location atomic_loc, Location loc)
|
||||||
|
{
|
||||||
|
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
|
||||||
|
atomicStructs[atomic_loc]->releaseLoc(loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
AddressManager::printLastWriter(Location loc) const
|
||||||
|
{
|
||||||
|
return logTable[loc]->print();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------- AtomicStruct --------------------------
|
||||||
|
AddressManager::AtomicStruct::AtomicStruct(Location atomic_loc,
|
||||||
|
Location loc_begin,
|
||||||
|
Location loc_end)
|
||||||
|
{
|
||||||
|
// the location range must have at least 1 location
|
||||||
|
assert(loc_begin <= loc_end);
|
||||||
|
|
||||||
|
atomicLoc = atomic_loc;
|
||||||
|
arraySize = loc_end - loc_begin + 1;
|
||||||
|
locationBase = loc_begin;
|
||||||
|
|
||||||
|
// allocate an array of arrray_size
|
||||||
|
locArray = new Location[arraySize];
|
||||||
|
|
||||||
|
// initialize locArray & locProps
|
||||||
|
Location loc;
|
||||||
|
for (int offset = 0; offset < arraySize; ++offset) {
|
||||||
|
loc = locationBase + offset;
|
||||||
|
locArray[offset] = loc;
|
||||||
|
locProps.push_back(LocProperty(offset, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// region (1) and (3) are initially empty
|
||||||
|
firstMark = 0;
|
||||||
|
secondMark = arraySize;
|
||||||
|
// no request made at this location so far
|
||||||
|
requestCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::AtomicStruct::~AtomicStruct()
|
||||||
|
{
|
||||||
|
delete[] locArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AddressManager::AtomicStruct::startLocSelection()
|
||||||
|
{
|
||||||
|
assert(firstMark >= 0);
|
||||||
|
assert(firstMark <= secondMark);
|
||||||
|
assert(secondMark <= arraySize);
|
||||||
|
// make sure loadStoreMap has been cleared
|
||||||
|
assert(loadStoreMap.empty());
|
||||||
|
|
||||||
|
// this atomic location is picked for Atomic_ACQ
|
||||||
|
// and Atomic_REL in an episode
|
||||||
|
requestCount += 2;
|
||||||
|
// add two expected values in expectedValues set
|
||||||
|
expectedValues.insert(requestCount - 1);
|
||||||
|
expectedValues.insert(requestCount - 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Location
|
||||||
|
AddressManager::AtomicStruct::getLoadLoc()
|
||||||
|
{
|
||||||
|
assert(firstMark >= 0);
|
||||||
|
assert(firstMark <= secondMark);
|
||||||
|
assert(secondMark <= arraySize);
|
||||||
|
|
||||||
|
if (firstMark == arraySize) {
|
||||||
|
// no location can be picked for a LD now, so return an empty location
|
||||||
|
return INVALID_LOCATION;
|
||||||
|
} else {
|
||||||
|
// we can pick any location btw
|
||||||
|
// locArray [firstMark : arraySize-1]
|
||||||
|
int range_size = arraySize - firstMark;
|
||||||
|
Location ret_loc = locArray[firstMark + random() % range_size];
|
||||||
|
|
||||||
|
// update loadStoreMap
|
||||||
|
LdStMap::iterator it = loadStoreMap.find(ret_loc);
|
||||||
|
|
||||||
|
if (it == loadStoreMap.end()) {
|
||||||
|
// insert a new entry to the map b/c the entry is not there yet
|
||||||
|
// to mark this location has been picked for a LD
|
||||||
|
loadStoreMap.insert(std::pair<Location, LdStBits>
|
||||||
|
(ret_loc, LdStBits(true,false)));
|
||||||
|
} else {
|
||||||
|
// otherwise, just update the LD bit
|
||||||
|
(it->second).first = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret_loc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Location
|
||||||
|
AddressManager::AtomicStruct::getStoreLoc()
|
||||||
|
{
|
||||||
|
assert(firstMark >= 0);
|
||||||
|
assert(firstMark <= secondMark);
|
||||||
|
assert(secondMark <= arraySize);
|
||||||
|
|
||||||
|
if (firstMark == secondMark) {
|
||||||
|
// no location can be picked for a ST now, return an invalid location
|
||||||
|
return INVALID_LOCATION;
|
||||||
|
} else {
|
||||||
|
// we can pick any location btw [firstMark : secondMark-1]
|
||||||
|
int range_size = secondMark - firstMark;
|
||||||
|
Location ret_loc = locArray[firstMark + random() % range_size];
|
||||||
|
|
||||||
|
// update loadStoreMap
|
||||||
|
LdStMap::iterator it = loadStoreMap.find(ret_loc);
|
||||||
|
|
||||||
|
if (it == loadStoreMap.end()) {
|
||||||
|
// insert a new entry to the map b/c the entry is not there yet
|
||||||
|
// to mark this location has been picked for a ST
|
||||||
|
loadStoreMap.insert(std::pair<Location, LdStBits>
|
||||||
|
(ret_loc, LdStBits(false,true)));
|
||||||
|
} else {
|
||||||
|
// otherwise, just update the ST bit
|
||||||
|
(it->second).second = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret_loc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for each entry in loadStoreMap,
|
||||||
|
// if <LD_bit, ST_bit> == <1,0>
|
||||||
|
// - if the location is in (2), then move it to (3)
|
||||||
|
// - if the location is in (3), no move
|
||||||
|
// - otherwise, throw an error
|
||||||
|
// if <LD_bit, ST_bit> == <0,1> or <1,1>
|
||||||
|
// - move it from (2) to (1)
|
||||||
|
void
|
||||||
|
AddressManager::AtomicStruct::endLocSelection()
|
||||||
|
{
|
||||||
|
assert(firstMark >= 0);
|
||||||
|
assert(firstMark <= secondMark);
|
||||||
|
assert(secondMark <= arraySize);
|
||||||
|
|
||||||
|
for (auto& it : loadStoreMap) {
|
||||||
|
Location loc = it.first;
|
||||||
|
LdStBits p = it.second;
|
||||||
|
|
||||||
|
assert(loc >= locationBase && loc < locationBase + arraySize);
|
||||||
|
LocProperty& loc_prop = locProps[loc - locationBase];
|
||||||
|
|
||||||
|
if (p.first && !p.second) {
|
||||||
|
// this location has been picked for LD(s) but not ST
|
||||||
|
// it must be in either region (2) or (3)
|
||||||
|
assert(inSecondRegion(loc_prop.first) ||
|
||||||
|
inThirdRegion(loc_prop.first));
|
||||||
|
|
||||||
|
if (inSecondRegion(loc_prop.first)) {
|
||||||
|
// there is no owner of this location yet
|
||||||
|
assert(loc_prop.second == 0);
|
||||||
|
|
||||||
|
// pick the last location in (2) to swap
|
||||||
|
Location swapped_loc = locArray[secondMark - 1];
|
||||||
|
LocProperty& swapped_loc_prop =
|
||||||
|
locProps[swapped_loc - locationBase];
|
||||||
|
|
||||||
|
// swap loc and swapped_loc
|
||||||
|
swap(loc_prop, swapped_loc_prop);
|
||||||
|
|
||||||
|
// then, expand (3)
|
||||||
|
secondMark--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// increment the location's number of owners
|
||||||
|
loc_prop.second++;
|
||||||
|
} else if (p.second) {
|
||||||
|
// this location has been picked for ST(s) and/or LD(s)
|
||||||
|
// it must be in region (2)
|
||||||
|
assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
|
||||||
|
|
||||||
|
// pick the first location in (2) to swap
|
||||||
|
Location swapped_loc = locArray[firstMark];
|
||||||
|
LocProperty& swapped_loc_prop =
|
||||||
|
locProps[swapped_loc - locationBase];
|
||||||
|
|
||||||
|
// swap loc and swapped_loc
|
||||||
|
swap(loc_prop, swapped_loc_prop);
|
||||||
|
|
||||||
|
// then, expand (1)
|
||||||
|
firstMark++;
|
||||||
|
|
||||||
|
// increment the location's number of owners
|
||||||
|
loc_prop.second++;
|
||||||
|
} else {
|
||||||
|
panic("Location in loadStoreMap but wasn't picked in any"
|
||||||
|
" action\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// clear the ld_st_map
|
||||||
|
loadStoreMap.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AddressManager::AtomicStruct::releaseLoc(Location loc)
|
||||||
|
{
|
||||||
|
assert(loc >= locationBase && loc < locationBase + arraySize);
|
||||||
|
|
||||||
|
LocProperty& loc_prop = locProps[loc - locationBase];
|
||||||
|
|
||||||
|
if (inFirstRegion(loc_prop.first)) {
|
||||||
|
// this location must have exactly 1 owner
|
||||||
|
assert(loc_prop.second == 1);
|
||||||
|
|
||||||
|
// pick the last location in region 1 to swap
|
||||||
|
Location swapped_loc = locArray[firstMark - 1];
|
||||||
|
LocProperty& swapped_loc_prop = locProps[swapped_loc - locationBase];
|
||||||
|
|
||||||
|
// swap loc and swapped_loc
|
||||||
|
swap(loc_prop, swapped_loc_prop);
|
||||||
|
|
||||||
|
// then shrink (1)
|
||||||
|
firstMark--;
|
||||||
|
|
||||||
|
// reset the location's number of owners
|
||||||
|
loc_prop.second = 0;
|
||||||
|
} else if (inThirdRegion(loc_prop.first)) {
|
||||||
|
// this location must have at least 1 owner
|
||||||
|
assert(loc_prop.second >= 1);
|
||||||
|
|
||||||
|
if (loc_prop.second == 1) {
|
||||||
|
// pick the first location in region 3 to swap
|
||||||
|
Location swapped_loc = locArray[secondMark];
|
||||||
|
LocProperty& swapped_loc_prop =
|
||||||
|
locProps[swapped_loc - locationBase];
|
||||||
|
|
||||||
|
// swap loc and swapped_loc
|
||||||
|
swap(loc_prop, swapped_loc_prop);
|
||||||
|
|
||||||
|
// then shrink (3)
|
||||||
|
secondMark++;
|
||||||
|
}
|
||||||
|
// decrement the loc's number of owners
|
||||||
|
loc_prop.second--;
|
||||||
|
} else {
|
||||||
|
// some one else must already reset this counter
|
||||||
|
assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AddressManager::AtomicStruct::isExpectedValue(Value val)
|
||||||
|
{
|
||||||
|
ExpectedValueSet::iterator it = expectedValues.find(val);
|
||||||
|
|
||||||
|
if (it == expectedValues.end()) {
|
||||||
|
std::stringstream exp_val_ss;
|
||||||
|
for (auto& val : expectedValues) {
|
||||||
|
exp_val_ss << " " << val;
|
||||||
|
}
|
||||||
|
|
||||||
|
warn("Expected return values are:\n\t%s\n", exp_val_ss.str());
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// erase this value b/c it's done
|
||||||
|
expectedValues.erase(it);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AddressManager::AtomicStruct::swap(LocProperty& prop_1, LocProperty& prop_2)
|
||||||
|
{
|
||||||
|
int new_idx_1 = prop_2.first;
|
||||||
|
int new_idx_2 = prop_1.first;
|
||||||
|
|
||||||
|
// swap the two locations in locArray
|
||||||
|
Location tmp = locArray[prop_1.first];
|
||||||
|
locArray[prop_1.first] = locArray[prop_2.first];
|
||||||
|
locArray[prop_2.first] = tmp;
|
||||||
|
|
||||||
|
// update their new indices
|
||||||
|
prop_1.first = new_idx_1;
|
||||||
|
prop_2.first = new_idx_2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------ log table ---------------------
|
||||||
|
void
|
||||||
|
AddressManager::updateLogTable(Location loc, int thread_id, int episode_id,
|
||||||
|
Value new_value, Tick cur_tick, int cu_id)
|
||||||
|
{
|
||||||
|
assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
|
||||||
|
logTable[loc]->update(thread_id, cu_id, episode_id, new_value, cur_tick);
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Value
|
||||||
|
AddressManager::getLoggedValue(Location loc) const
|
||||||
|
{
|
||||||
|
assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
|
||||||
|
return logTable[loc]->getLastStoredValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AddressManager::validateAtomicResp(Location loc, Value ret_val)
|
||||||
|
{
|
||||||
|
assert(loc >= 0 && loc < numAtomicLocs);
|
||||||
|
return atomicStructs[loc]->isExpectedValue(ret_val);
|
||||||
|
}
|
||||||
274
src/cpu/testers/gpu_ruby_test/address_manager.hh
Normal file
274
src/cpu/testers/gpu_ruby_test/address_manager.hh
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
|
||||||
|
#define CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "base/types.hh"
|
||||||
|
#include "sim/eventq.hh"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* --- AddressManager has 3 main tasks ---
|
||||||
|
* (1) generate DRF request sequences
|
||||||
|
* (2) maintain internal log table
|
||||||
|
* (3) validate return values against ones in the log table
|
||||||
|
*
|
||||||
|
* A location is an abstract index of a unique real address.
|
||||||
|
* It's used internally within the tester only.
|
||||||
|
* randAddressMap has the mapping between a location and its real address.
|
||||||
|
*
|
||||||
|
* A value is an integer that a location in real memory can store.
|
||||||
|
* for now, we assume a value is 4-byte
|
||||||
|
*
|
||||||
|
* The location range (randAddressMap) has two distinct parts:
|
||||||
|
* Atomic locations: in the 1st part of randAddressMap &
|
||||||
|
* Non-atomic locations (or just locations): in the 2nd part
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* --- DRF request sequence generation ---
|
||||||
|
* Each lane of an episode starts selecting its location by calling:
|
||||||
|
* (1) getAtomicLoc
|
||||||
|
* (2) getLoadLoc/getStoreLoc
|
||||||
|
* (3) finishLocSelection
|
||||||
|
*
|
||||||
|
* Each lane of an episode completes its executing by calling:
|
||||||
|
* releaseLocation for all locations it selected
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* --- Internal structures ---
|
||||||
|
* There are multiple atomic structures, each of which corresponds
|
||||||
|
* to an atomic location.
|
||||||
|
*
|
||||||
|
* Each atomic structure manages a distinct range of locations in locArray
|
||||||
|
* This array is partitioned into 3 parts that are used to select locations
|
||||||
|
* for LDs and STs. Here is the location selecting rule:
|
||||||
|
* | (1) | (2) | (3) |
|
||||||
|
* - all locations in (1) cannot be picked for any LD and ST action
|
||||||
|
* - all locations in (2) can be picked for either LD or ST action
|
||||||
|
* - all locations in (3) can be picked for LD action only
|
||||||
|
*
|
||||||
|
* We maintain the 3 parts by 2 indices firstMark and secondMark.
|
||||||
|
* As locations are moved between partitions, both indices are updated
|
||||||
|
* accordingly.
|
||||||
|
* [0 .. firstMark-1] part (1)
|
||||||
|
* [firstMark .. secondMark-1] part (2)
|
||||||
|
* [secondMark .. arraySize-1] part (3)
|
||||||
|
*
|
||||||
|
* Each location has its context/property. locProps maintains
|
||||||
|
* contexts/properties of all locations. Context/property includes
|
||||||
|
* - current index of a location in locArray
|
||||||
|
* - the number of owners who are currently using the location
|
||||||
|
*
|
||||||
|
* To guarantee DRF constraints, the following conditions must hold
|
||||||
|
* - all locations in (1) have exactly 1 owner
|
||||||
|
* - all locations in (2) have exactly 0 owner
|
||||||
|
* - all locations in (3) have at least 1 owner
|
||||||
|
* - A LD request can randomly pick any location in (2) & (3)
|
||||||
|
* - A ST request can randomly pick any location in (2)
|
||||||
|
*
|
||||||
|
* loadStoreMap maintains all locations already selected for LDs/STs so far
|
||||||
|
*
|
||||||
|
* When endLocSelection is called (i.e., we've picked all locations for an
|
||||||
|
* episode), we need to move each selected location to its right partition.
|
||||||
|
* if LD_bit == 1 && ST_bit == 0 (i.e., picked for LDs), then move the
|
||||||
|
* location to (3) -> future LDs can pick it.
|
||||||
|
* if LD_bit == 0 && ST_bit == 1, then move the location to (1) -> NO future
|
||||||
|
* action can pick it until this episode is done.
|
||||||
|
* if LD_bit == 1 && ST_bit == 1, then move the location to (1) -> NO future
|
||||||
|
* action can pick it until this episode is done.
|
||||||
|
* clear the loadStoreMap
|
||||||
|
*/
|
||||||
|
|
||||||
|
class AddressManager
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
AddressManager(int n_atomic_locs, int numNormalLocsPerAtomic);
|
||||||
|
~AddressManager();
|
||||||
|
|
||||||
|
typedef int32_t Value;
|
||||||
|
typedef int32_t Location;
|
||||||
|
|
||||||
|
// return the unique address mapped to a location
|
||||||
|
Addr getAddress(Location loc);
|
||||||
|
// return a unique atomic location & start picking locations
|
||||||
|
Location getAtomicLoc();
|
||||||
|
// return a random location for LD
|
||||||
|
Location getLoadLoc(Location atomic_loc);
|
||||||
|
// return a random location for ST
|
||||||
|
Location getStoreLoc(Location atomic_loc);
|
||||||
|
// finish picking locations
|
||||||
|
void finishLocSelection(Location atomic_loc);
|
||||||
|
// an episode is done, release location I've picked
|
||||||
|
void releaseLocation(Location atomic_loc, Location loc);
|
||||||
|
// update a log table entry with a given set of values
|
||||||
|
void updateLogTable(Location loc, int threadId, int episodeId,
|
||||||
|
Value new_value, Tick curTick, int cuId = -1);
|
||||||
|
// return the current value in the log table
|
||||||
|
Value getLoggedValue(Location loc) const;
|
||||||
|
// validate atomic response
|
||||||
|
bool validateAtomicResp(Location loc, Value ret_val);
|
||||||
|
|
||||||
|
std::string printLastWriter(Location loc) const;
|
||||||
|
|
||||||
|
static const int INVALID_VALUE;
|
||||||
|
static const int INVALID_LOCATION;
|
||||||
|
|
||||||
|
private:
|
||||||
|
class LastWriter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
LastWriter()
|
||||||
|
: threadId(-1), cuId(-1), episodeId(-1), value(0),
|
||||||
|
writeTick(0)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
const std::string print() const
|
||||||
|
{
|
||||||
|
return "(GpuThread ID " + std::to_string(threadId) +
|
||||||
|
", CU ID " + std::to_string(cuId) +
|
||||||
|
", Episode ID " + std::to_string(episodeId) +
|
||||||
|
", Value " + std::to_string(value) +
|
||||||
|
", Tick " + std::to_string(writeTick) +
|
||||||
|
")";
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(int _thread, int _cu, int _episode, Value _value,
|
||||||
|
Tick _tick)
|
||||||
|
{
|
||||||
|
threadId = _thread;
|
||||||
|
cuId = _cu;
|
||||||
|
episodeId = _episode;
|
||||||
|
value = _value;
|
||||||
|
writeTick = _tick;
|
||||||
|
}
|
||||||
|
|
||||||
|
Value getLastStoredValue() const { return value; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int threadId;
|
||||||
|
int cuId;
|
||||||
|
int episodeId;
|
||||||
|
Value value;
|
||||||
|
Tick writeTick;
|
||||||
|
};
|
||||||
|
|
||||||
|
class AtomicStruct
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
AtomicStruct(Location atom_loc, Location loc_begin, Location loc_end);
|
||||||
|
~AtomicStruct();
|
||||||
|
|
||||||
|
// functions picking locations for LD/ST/ATOMIC ops
|
||||||
|
void startLocSelection();
|
||||||
|
Location getLoadLoc();
|
||||||
|
Location getStoreLoc();
|
||||||
|
void endLocSelection();
|
||||||
|
|
||||||
|
// an episode completed its actions
|
||||||
|
// return locations to their correct positions
|
||||||
|
void releaseLoc(Location loc);
|
||||||
|
// is the value what we expect?
|
||||||
|
bool isExpectedValue(Value val);
|
||||||
|
|
||||||
|
private:
|
||||||
|
Location atomicLoc;
|
||||||
|
Location locationBase;
|
||||||
|
|
||||||
|
// array storing all locations this structure is managing
|
||||||
|
Location* locArray;
|
||||||
|
int firstMark, secondMark;
|
||||||
|
int arraySize;
|
||||||
|
|
||||||
|
// a vector of location's properties
|
||||||
|
typedef std::pair<int, int> LocProperty;
|
||||||
|
typedef std::vector<LocProperty> LocPropTable;
|
||||||
|
LocPropTable locProps;
|
||||||
|
|
||||||
|
// a temporary map of location and its LD/ST selection
|
||||||
|
typedef std::pair<bool, bool> LdStBits;
|
||||||
|
typedef std::unordered_map<Location, LdStBits> LdStMap;
|
||||||
|
LdStMap loadStoreMap;
|
||||||
|
|
||||||
|
// number of atomic requests at this location so far
|
||||||
|
int requestCount;
|
||||||
|
// a set of expected values
|
||||||
|
// when we request the first n atomic ops, we expect to receive n
|
||||||
|
// return values from [0 .. n-1]
|
||||||
|
typedef std::unordered_set<Value> ExpectedValueSet;
|
||||||
|
ExpectedValueSet expectedValues;
|
||||||
|
|
||||||
|
// swap two locations in locArray
|
||||||
|
void swap(LocProperty& prop_1, LocProperty& prop_2);
|
||||||
|
|
||||||
|
bool inFirstRegion(int idx) const
|
||||||
|
{
|
||||||
|
return (idx >= 0 && idx < firstMark);
|
||||||
|
}
|
||||||
|
bool inSecondRegion(int idx) const
|
||||||
|
{
|
||||||
|
return (idx >= firstMark && idx < secondMark);
|
||||||
|
}
|
||||||
|
bool inThirdRegion(int idx) const
|
||||||
|
{
|
||||||
|
return (idx >= secondMark && idx < arraySize);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// number of atomic locations
|
||||||
|
int numAtomicLocs;
|
||||||
|
// number of normal/non-atomic locations per atomic structure
|
||||||
|
int numLocsPerAtomic;
|
||||||
|
// total number of non-atomic locations
|
||||||
|
int numNormalLocs;
|
||||||
|
|
||||||
|
// location - address mapping
|
||||||
|
typedef std::vector<Addr> AddressMap;
|
||||||
|
AddressMap randAddressMap;
|
||||||
|
|
||||||
|
// a list of atomic structures
|
||||||
|
typedef std::vector<AtomicStruct*> AtomicStructTable;
|
||||||
|
AtomicStructTable atomicStructs;
|
||||||
|
|
||||||
|
// internal log table
|
||||||
|
typedef std::vector<LastWriter*> LogTable;
|
||||||
|
LogTable logTable;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ */
|
||||||
123
src/cpu/testers/gpu_ruby_test/cpu_thread.cc
Normal file
123
src/cpu/testers/gpu_ruby_test/cpu_thread.cc
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||||
|
|
||||||
|
#include "debug/ProtocolTest.hh"
|
||||||
|
|
||||||
|
CpuThread::CpuThread(const Params &p)
|
||||||
|
:GpuThread(p)
|
||||||
|
{
|
||||||
|
threadName = "CpuThread(Thread ID " + std::to_string(threadId) + ")";
|
||||||
|
threadEvent.setDesc("CpuThread tick");
|
||||||
|
assert(numLanes == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
CpuThread*
|
||||||
|
CpuThreadParams::create() const
|
||||||
|
{
|
||||||
|
return new CpuThread(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CpuThread::issueLoadOps()
|
||||||
|
{
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::LOAD);
|
||||||
|
// we should not have any outstanding fence or atomic op at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
fatal("CpuThread::issueLoadOps - not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CpuThread::issueStoreOps()
|
||||||
|
{
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::STORE);
|
||||||
|
// we should not have any outstanding fence or atomic op at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
fatal("CpuThread::issueStoreOps - not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CpuThread::issueAtomicOps()
|
||||||
|
{
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
|
||||||
|
// we should not have any outstanding ops at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
fatal("CpuThread::issueAtomicOps - not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CpuThread::issueAcquireOp()
|
||||||
|
{
|
||||||
|
DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");
|
||||||
|
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
|
||||||
|
// we should not have any outstanding ops at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
// no-op: Acquire does not apply to CPU threads
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CpuThread::issueReleaseOp()
|
||||||
|
{
|
||||||
|
DPRINTF(ProtocolTest, "Issuing Release Op ...\n");
|
||||||
|
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::RELEASE);
|
||||||
|
// we should not have any outstanding ops at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
// no-op: Release does not apply to CPU threads
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
CpuThread::hitCallback(PacketPtr pkt)
|
||||||
|
{
|
||||||
|
fatal("CpuThread::hitCallback - not yet implemented");
|
||||||
|
}
|
||||||
61
src/cpu/testers/gpu_ruby_test/cpu_thread.hh
Normal file
61
src/cpu/testers/gpu_ruby_test/cpu_thread.hh
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
|
||||||
|
#define CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||||
|
#include "params/CpuThread.hh"
|
||||||
|
#include "sim/clocked_object.hh"
|
||||||
|
|
||||||
|
class CpuThread : public GpuThread
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef CpuThreadParams Params;
|
||||||
|
CpuThread(const Params &p);
|
||||||
|
virtual ~CpuThread() = default;
|
||||||
|
|
||||||
|
typedef AddressManager::Location Location;
|
||||||
|
typedef AddressManager::Value Value;
|
||||||
|
|
||||||
|
void hitCallback(PacketPtr pkt);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void issueLoadOps();
|
||||||
|
void issueStoreOps();
|
||||||
|
void issueAtomicOps();
|
||||||
|
void issueAcquireOp();
|
||||||
|
void issueReleaseOp();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ */
|
||||||
321
src/cpu/testers/gpu_ruby_test/episode.cc
Normal file
321
src/cpu/testers/gpu_ruby_test/episode.cc
Normal file
@@ -0,0 +1,321 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/episode.hh"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||||
|
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||||
|
|
||||||
|
Episode::Episode(ProtocolTester* _tester, GpuThread* _thread, int num_loads,
|
||||||
|
int num_stores)
|
||||||
|
: tester(_tester),
|
||||||
|
thread(_thread),
|
||||||
|
numLoads(num_loads),
|
||||||
|
numStores(num_stores),
|
||||||
|
nextActionIdx(0)
|
||||||
|
{
|
||||||
|
assert(tester && thread);
|
||||||
|
|
||||||
|
episodeId = tester->getNextEpisodeID();
|
||||||
|
numLanes = thread->getNumLanes();
|
||||||
|
assert(numLanes > 0);
|
||||||
|
|
||||||
|
addrManager = tester->getAddressManager();
|
||||||
|
assert(addrManager);
|
||||||
|
|
||||||
|
atomicLocs.resize(numLanes, AddressManager::INVALID_LOCATION);
|
||||||
|
// generate a sequence of actions
|
||||||
|
initActions();
|
||||||
|
isActive = true;
|
||||||
|
|
||||||
|
DPRINTFN("Episode %d\n", episodeId);
|
||||||
|
}
|
||||||
|
|
||||||
|
Episode::~Episode()
|
||||||
|
{
|
||||||
|
for (Episode::Action* action : actions) {
|
||||||
|
assert(action);
|
||||||
|
delete action;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Episode::Action*
|
||||||
|
Episode::peekCurAction() const
|
||||||
|
{
|
||||||
|
if (nextActionIdx < actions.size())
|
||||||
|
return actions[nextActionIdx];
|
||||||
|
else
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Episode::popAction()
|
||||||
|
{
|
||||||
|
assert(nextActionIdx < actions.size());
|
||||||
|
nextActionIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Episode::initActions()
|
||||||
|
{
|
||||||
|
// first, push Atomic & then Acquire action
|
||||||
|
actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
|
||||||
|
actions.push_back(new Action(Action::Type::ACQUIRE, numLanes));
|
||||||
|
|
||||||
|
// second, push a number of LD/ST actions
|
||||||
|
int num_loads = numLoads;
|
||||||
|
int num_stores = numStores;
|
||||||
|
while ((num_loads + num_stores) > 0) {
|
||||||
|
switch (random() % 2) {
|
||||||
|
case 0: // Load
|
||||||
|
if (num_loads > 0) {
|
||||||
|
actions.push_back(new Action(Action::Type::LOAD,
|
||||||
|
numLanes));
|
||||||
|
num_loads--;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1: // Store
|
||||||
|
if (num_stores > 0) {
|
||||||
|
actions.push_back(new Action(Action::Type::STORE,
|
||||||
|
numLanes));
|
||||||
|
num_stores--;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// last, push an Release & then Atomic action
|
||||||
|
actions.push_back(new Action(Action::Type::RELEASE, numLanes));
|
||||||
|
actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
|
||||||
|
|
||||||
|
// for each lane, pick a list of locations
|
||||||
|
Location normal_loc;
|
||||||
|
|
||||||
|
for (int lane = 0; lane < numLanes; ++lane) {
|
||||||
|
normal_loc = AddressManager::INVALID_LOCATION;
|
||||||
|
|
||||||
|
// first, we select atomic loc for this lane
|
||||||
|
// atomic loc for this lane should not have been picked yet
|
||||||
|
assert(atomicLocs[lane] == AddressManager::INVALID_LOCATION);
|
||||||
|
// pick randomly an atomic location
|
||||||
|
atomicLocs[lane] = addrManager->getAtomicLoc();
|
||||||
|
assert(atomicLocs[lane] >= 0);
|
||||||
|
|
||||||
|
// go through each action in this lane and set its location
|
||||||
|
for (Action* action : actions) {
|
||||||
|
assert(action);
|
||||||
|
|
||||||
|
switch (action->getType()) {
|
||||||
|
case Action::Type::ATOMIC:
|
||||||
|
action->setLocation(lane, atomicLocs[lane]);
|
||||||
|
break;
|
||||||
|
case Action::Type::LOAD:
|
||||||
|
// pick randomly a normal location
|
||||||
|
normal_loc = addrManager->
|
||||||
|
getLoadLoc(atomicLocs[lane]);
|
||||||
|
assert(normal_loc >= AddressManager::INVALID_LOCATION);
|
||||||
|
|
||||||
|
if (normal_loc != AddressManager::INVALID_LOCATION) {
|
||||||
|
// check DRF
|
||||||
|
if (!tester->checkDRF(atomicLocs[lane],
|
||||||
|
normal_loc, false) ||
|
||||||
|
!this->checkDRF(atomicLocs[lane], normal_loc,
|
||||||
|
false, lane)) {
|
||||||
|
panic("GpuTh %d - Data race detected. STOPPED!\n",
|
||||||
|
thread->getGpuThreadId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
action->setLocation(lane, normal_loc);
|
||||||
|
break;
|
||||||
|
case Action::Type::STORE:
|
||||||
|
// pick randomly a normal location
|
||||||
|
normal_loc = addrManager->
|
||||||
|
getStoreLoc(atomicLocs[lane]);
|
||||||
|
assert(normal_loc >= AddressManager::INVALID_LOCATION);
|
||||||
|
|
||||||
|
if (normal_loc != AddressManager::INVALID_LOCATION) {
|
||||||
|
// check DRF
|
||||||
|
if (!tester->checkDRF(atomicLocs[lane],
|
||||||
|
normal_loc, true) ||
|
||||||
|
!this->checkDRF(atomicLocs[lane], normal_loc,
|
||||||
|
true, lane)) {
|
||||||
|
panic("GpuTh %d - Data race detected. STOPPED!\n",
|
||||||
|
thread->getGpuThreadId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
action->setLocation(lane, normal_loc);
|
||||||
|
break;
|
||||||
|
case Action::Type::ACQUIRE:
|
||||||
|
case Action::Type::RELEASE:
|
||||||
|
// no op
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
panic("Invalid action type\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
addrManager->finishLocSelection(atomicLocs[lane]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Episode::completeEpisode()
|
||||||
|
{
|
||||||
|
// release all locations this episode has picked and used
|
||||||
|
Location atomic_loc, normal_loc;
|
||||||
|
for (int lane = 0; lane < numLanes; ++lane) {
|
||||||
|
atomic_loc = AddressManager::INVALID_LOCATION;
|
||||||
|
normal_loc = AddressManager::INVALID_LOCATION;
|
||||||
|
|
||||||
|
std::unordered_set<Location> unique_loc_set;
|
||||||
|
|
||||||
|
for (Action* action : actions) {
|
||||||
|
assert(action);
|
||||||
|
|
||||||
|
if (action->isAtomicAction()) {
|
||||||
|
if (atomic_loc == AddressManager::INVALID_LOCATION) {
|
||||||
|
atomic_loc = action->getLocation(lane);
|
||||||
|
} else {
|
||||||
|
// both atomic ops in the same lane must be
|
||||||
|
// at the same location
|
||||||
|
assert(atomic_loc == action->getLocation(lane));
|
||||||
|
}
|
||||||
|
} else if (!action->isMemFenceAction()) {
|
||||||
|
assert(atomic_loc >= 0);
|
||||||
|
normal_loc = action->getLocation(lane);
|
||||||
|
|
||||||
|
if (normal_loc >= 0)
|
||||||
|
unique_loc_set.insert(normal_loc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// each unique loc can be released only once
|
||||||
|
for (Location loc : unique_loc_set)
|
||||||
|
addrManager->releaseLocation(atomic_loc, loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// this episode is no longer active
|
||||||
|
isActive = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
Episode::checkDRF(Location atomic_loc, Location loc, bool isStore,
|
||||||
|
int max_lane) const
|
||||||
|
{
|
||||||
|
assert(atomic_loc != AddressManager::INVALID_LOCATION);
|
||||||
|
assert(loc != AddressManager::INVALID_LOCATION);
|
||||||
|
assert(max_lane <= numLanes);
|
||||||
|
|
||||||
|
for (int lane = 0; lane < max_lane; ++lane) {
|
||||||
|
if (atomic_loc == atomicLocs[lane]) {
|
||||||
|
for (const Action* action : actions) {
|
||||||
|
if (!action->isAtomicAction() &&
|
||||||
|
!action->isMemFenceAction()) {
|
||||||
|
if (isStore && loc == action->getLocation(lane)) {
|
||||||
|
warn("ST at location %d races against thread %d\n",
|
||||||
|
loc, thread->getGpuThreadId());
|
||||||
|
return false;
|
||||||
|
} else if (!isStore &&
|
||||||
|
action->getType() == Action::Type::STORE &&
|
||||||
|
loc == action->getLocation(lane)) {
|
||||||
|
warn("LD at location %d races against thread %d\n",
|
||||||
|
loc, thread->getGpuThreadId());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------- Action class ----------------------------
|
||||||
|
Episode::Action::Action(Type t, int num_lanes)
|
||||||
|
: type(t),
|
||||||
|
numLanes(num_lanes)
|
||||||
|
{
|
||||||
|
assert(numLanes > 0);
|
||||||
|
locations.resize(numLanes);
|
||||||
|
for (Location &loc : locations) loc = AddressManager::INVALID_LOCATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Episode::Action::setLocation(int lane, Location loc)
|
||||||
|
{
|
||||||
|
assert(lane >= 0 && lane < numLanes);
|
||||||
|
locations[lane] = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressManager::Location
|
||||||
|
Episode::Action::getLocation(int lane) const
|
||||||
|
{
|
||||||
|
assert(lane >= 0 && lane < numLanes);
|
||||||
|
return locations[lane];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
Episode::Action::isAtomicAction() const
|
||||||
|
{
|
||||||
|
return (type == Type::ATOMIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
Episode::Action::isMemFenceAction() const
|
||||||
|
{
|
||||||
|
return (type == Type::ACQUIRE || type == Type::RELEASE);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string
|
||||||
|
Episode::Action::printType() const
|
||||||
|
{
|
||||||
|
if (type == Type::ACQUIRE)
|
||||||
|
return "ACQUIRE";
|
||||||
|
else if (type == Type::RELEASE)
|
||||||
|
return "RELEASE";
|
||||||
|
else if (type == Type::ATOMIC)
|
||||||
|
return "ATOMIC";
|
||||||
|
else if (type == Type::LOAD)
|
||||||
|
return "LOAD";
|
||||||
|
else if (type == Type::STORE)
|
||||||
|
return "STORE";
|
||||||
|
else
|
||||||
|
panic("Invalid action type\n");
|
||||||
|
}
|
||||||
126
src/cpu/testers/gpu_ruby_test/episode.hh
Normal file
126
src/cpu/testers/gpu_ruby_test/episode.hh
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
|
||||||
|
#define CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||||
|
|
||||||
|
class ProtocolTester;
|
||||||
|
class GpuThread;
|
||||||
|
|
||||||
|
class Episode
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef AddressManager::Location Location;
|
||||||
|
typedef AddressManager::Value Value;
|
||||||
|
|
||||||
|
class Action {
|
||||||
|
public:
|
||||||
|
enum class Type {
|
||||||
|
ACQUIRE,
|
||||||
|
RELEASE,
|
||||||
|
ATOMIC,
|
||||||
|
LOAD,
|
||||||
|
STORE,
|
||||||
|
};
|
||||||
|
|
||||||
|
Action(Type t, int num_lanes);
|
||||||
|
~Action() {}
|
||||||
|
|
||||||
|
Type getType() const { return type; }
|
||||||
|
void setLocation(int lane, Location loc);
|
||||||
|
Location getLocation(int lane) const;
|
||||||
|
bool isAtomicAction() const;
|
||||||
|
bool isMemFenceAction() const;
|
||||||
|
const std::string printType() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Type type;
|
||||||
|
int numLanes;
|
||||||
|
typedef std::vector<Location> LocationList;
|
||||||
|
LocationList locations;
|
||||||
|
};
|
||||||
|
|
||||||
|
Episode(ProtocolTester* tester, GpuThread* thread, int num_loads,
|
||||||
|
int num_stores);
|
||||||
|
~Episode();
|
||||||
|
|
||||||
|
// return episode id
|
||||||
|
int getEpisodeId() const { return episodeId; }
|
||||||
|
// return the action at the head of the action queue
|
||||||
|
const Action* peekCurAction() const;
|
||||||
|
// pop the action at the head of the action queue
|
||||||
|
void popAction();
|
||||||
|
// check if there is more action to be issued in this episode
|
||||||
|
bool hasMoreActions() const { return nextActionIdx < actions.size();}
|
||||||
|
// complete this episode by releasing all locations & updating st effects
|
||||||
|
void completeEpisode();
|
||||||
|
// check if this episode is executing
|
||||||
|
bool isEpsActive() const { return isActive; }
|
||||||
|
// check if the input episode and this one have any data race
|
||||||
|
bool checkDRF(Location atomic_loc, Location loc, bool isStore,
|
||||||
|
int max_lane) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// pointers to tester, thread and address amanger structures
|
||||||
|
ProtocolTester *tester;
|
||||||
|
GpuThread *thread;
|
||||||
|
AddressManager *addrManager;
|
||||||
|
|
||||||
|
// a unique episode id
|
||||||
|
int episodeId;
|
||||||
|
// list of actions in this episode
|
||||||
|
typedef std::vector<Action*> ActionList;
|
||||||
|
ActionList actions;
|
||||||
|
// list of atomic locations picked for this episode
|
||||||
|
typedef std::vector<Location> AtomicLocationList;
|
||||||
|
AtomicLocationList atomicLocs;
|
||||||
|
|
||||||
|
// is a thread running this episode?
|
||||||
|
bool isActive;
|
||||||
|
// episode length = num_loads + num_stores
|
||||||
|
int numLoads;
|
||||||
|
int numStores;
|
||||||
|
// index of the next action in actions
|
||||||
|
int nextActionIdx;
|
||||||
|
// number of lanes in this thread
|
||||||
|
int numLanes;
|
||||||
|
|
||||||
|
// randomly generate actions in this episode
|
||||||
|
void initActions();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ */
|
||||||
430
src/cpu/testers/gpu_ruby_test/gpu_thread.cc
Normal file
430
src/cpu/testers/gpu_ruby_test/gpu_thread.cc
Normal file
@@ -0,0 +1,430 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "debug/ProtocolTest.hh"
|
||||||
|
|
||||||
|
GpuThread::GpuThread(const Params &p)
|
||||||
|
: ClockedObject(p),
|
||||||
|
threadEvent(this, "GpuThread tick"),
|
||||||
|
deadlockCheckEvent(this),
|
||||||
|
threadId(p.thread_id),
|
||||||
|
numLanes(p.num_lanes),
|
||||||
|
tester(nullptr), addrManager(nullptr), port(nullptr),
|
||||||
|
scalarPort(nullptr), sqcPort(nullptr), curEpisode(nullptr),
|
||||||
|
curAction(nullptr), pendingLdStCount(0), pendingFenceCount(0),
|
||||||
|
pendingAtomicCount(0), lastActiveCycle(Cycles(0)),
|
||||||
|
deadlockThreshold(p.deadlock_threshold)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuThread::~GpuThread()
|
||||||
|
{
|
||||||
|
for (auto ep : episodeHistory) {
|
||||||
|
assert(ep != nullptr);
|
||||||
|
delete ep;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::wakeup()
|
||||||
|
{
|
||||||
|
// this thread is waken up by one of the following events
|
||||||
|
// - hitCallback is called
|
||||||
|
// - a new episode is created
|
||||||
|
|
||||||
|
// check if this is the first episode in this thread
|
||||||
|
if (curEpisode == nullptr) {
|
||||||
|
issueNewEpisode();
|
||||||
|
assert(curEpisode);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isNextActionReady()) {
|
||||||
|
// isNextActionReady should check if the action list is empty
|
||||||
|
assert(curAction != nullptr);
|
||||||
|
|
||||||
|
// issue the next action
|
||||||
|
issueNextAction();
|
||||||
|
} else {
|
||||||
|
// check for completion of the current episode
|
||||||
|
// completion = no outstanding requests + not having more actions
|
||||||
|
if (!curEpisode->hasMoreActions() &&
|
||||||
|
pendingLdStCount == 0 &&
|
||||||
|
pendingFenceCount == 0 &&
|
||||||
|
pendingAtomicCount == 0) {
|
||||||
|
|
||||||
|
curEpisode->completeEpisode();
|
||||||
|
|
||||||
|
// check if it's time to stop the tester
|
||||||
|
if (tester->checkExit()) {
|
||||||
|
// no more event is scheduled for this thread
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// issue the next episode
|
||||||
|
issueNewEpisode();
|
||||||
|
assert(curEpisode);
|
||||||
|
|
||||||
|
// now we get a new episode
|
||||||
|
// let's wake up the thread in the next cycle
|
||||||
|
if (!threadEvent.scheduled()) {
|
||||||
|
scheduleWakeup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::scheduleWakeup()
|
||||||
|
{
|
||||||
|
assert(!threadEvent.scheduled());
|
||||||
|
schedule(threadEvent, nextCycle());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::scheduleDeadlockCheckEvent()
|
||||||
|
{
|
||||||
|
// after this first schedule, the deadlock event is scheduled by itself
|
||||||
|
assert(!deadlockCheckEvent.scheduled());
|
||||||
|
schedule(deadlockCheckEvent, nextCycle());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
|
||||||
|
ProtocolTester::SeqPort *_port,
|
||||||
|
ProtocolTester::SeqPort *_scalarPort,
|
||||||
|
ProtocolTester::SeqPort *_sqcPort)
|
||||||
|
{
|
||||||
|
tester = _tester;
|
||||||
|
port = _port;
|
||||||
|
scalarPort = _scalarPort;
|
||||||
|
sqcPort = _sqcPort;
|
||||||
|
|
||||||
|
assert(tester && port);
|
||||||
|
addrManager = tester->getAddressManager();
|
||||||
|
assert(addrManager);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::issueNewEpisode()
|
||||||
|
{
|
||||||
|
int num_reg_loads = random() % tester->getEpisodeLength();
|
||||||
|
int num_reg_stores = tester->getEpisodeLength() - num_reg_loads;
|
||||||
|
|
||||||
|
// create a new episode
|
||||||
|
curEpisode = new Episode(tester, this, num_reg_loads, num_reg_stores);
|
||||||
|
episodeHistory.push_back(curEpisode);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
GpuThread::isNextActionReady()
|
||||||
|
{
|
||||||
|
if (!curEpisode->hasMoreActions()) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
curAction = curEpisode->peekCurAction();
|
||||||
|
|
||||||
|
switch(curAction->getType()) {
|
||||||
|
case Episode::Action::Type::ATOMIC:
|
||||||
|
// an atomic action must wait for all previous requests
|
||||||
|
// to complete
|
||||||
|
if (pendingLdStCount == 0 &&
|
||||||
|
pendingFenceCount == 0 &&
|
||||||
|
pendingAtomicCount == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
case Episode::Action::Type::ACQUIRE:
|
||||||
|
// we should not see any outstanding ld_st or fence here
|
||||||
|
assert(pendingLdStCount == 0 &&
|
||||||
|
pendingFenceCount == 0);
|
||||||
|
|
||||||
|
// an acquire action must wait for all previous atomic
|
||||||
|
// requests to complete
|
||||||
|
if (pendingAtomicCount == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
case Episode::Action::Type::RELEASE:
|
||||||
|
// we should not see any outstanding atomic or fence here
|
||||||
|
assert(pendingAtomicCount == 0 &&
|
||||||
|
pendingFenceCount == 0);
|
||||||
|
|
||||||
|
// a release action must wait for all previous ld/st
|
||||||
|
// requests to complete
|
||||||
|
if (pendingLdStCount == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
case Episode::Action::Type::LOAD:
|
||||||
|
case Episode::Action::Type::STORE:
|
||||||
|
// we should not see any outstanding atomic here
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
// can't issue if there is a pending fence
|
||||||
|
if (pendingFenceCount > 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// a Load or Store is ready if it doesn't overlap
|
||||||
|
// with any outstanding request
|
||||||
|
for (int lane = 0; lane < numLanes; ++lane) {
|
||||||
|
Location loc = curAction->getLocation(lane);
|
||||||
|
|
||||||
|
if (loc != AddressManager::INVALID_LOCATION) {
|
||||||
|
Addr addr = addrManager->getAddress(loc);
|
||||||
|
|
||||||
|
if (outstandingLoads.find(addr) !=
|
||||||
|
outstandingLoads.end()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outstandingStores.find(addr) !=
|
||||||
|
outstandingStores.end()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outstandingAtomics.find(addr) !=
|
||||||
|
outstandingAtomics.end()) {
|
||||||
|
// this is not an atomic action, so the address
|
||||||
|
// should not be in outstandingAtomics list
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
panic("The tester got an invalid action\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::issueNextAction()
|
||||||
|
{
|
||||||
|
switch(curAction->getType()) {
|
||||||
|
case Episode::Action::Type::ATOMIC:
|
||||||
|
issueAtomicOps();
|
||||||
|
break;
|
||||||
|
case Episode::Action::Type::ACQUIRE:
|
||||||
|
issueAcquireOp();
|
||||||
|
break;
|
||||||
|
case Episode::Action::Type::RELEASE:
|
||||||
|
issueReleaseOp();
|
||||||
|
break;
|
||||||
|
case Episode::Action::Type::LOAD:
|
||||||
|
issueLoadOps();
|
||||||
|
break;
|
||||||
|
case Episode::Action::Type::STORE:
|
||||||
|
issueStoreOps();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
panic("The tester got an invalid action\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// the current action has been issued, pop it from the action list
|
||||||
|
curEpisode->popAction();
|
||||||
|
lastActiveCycle = curCycle();
|
||||||
|
|
||||||
|
// we may be able to schedule the next action
|
||||||
|
// just wake up this thread in the next cycle
|
||||||
|
if (!threadEvent.scheduled()) {
|
||||||
|
scheduleWakeup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::addOutstandingReqs(OutstandingReqTable& req_table, Addr address,
|
||||||
|
int lane, Location loc, Value stored_val)
|
||||||
|
{
|
||||||
|
OutstandingReqTable::iterator it = req_table.find(address);
|
||||||
|
OutstandingReq req(lane, loc, stored_val, curCycle());
|
||||||
|
|
||||||
|
if (it == req_table.end()) {
|
||||||
|
// insert a new list of requests for this address
|
||||||
|
req_table.insert(std::pair<Addr, OutstandingReqList>(address,
|
||||||
|
OutstandingReqList(1, req)));
|
||||||
|
} else {
|
||||||
|
// add a new request
|
||||||
|
(it->second).push_back(req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuThread::OutstandingReq
|
||||||
|
GpuThread::popOutstandingReq(OutstandingReqTable& req_table, Addr addr)
|
||||||
|
{
|
||||||
|
OutstandingReqTable::iterator it = req_table.find(addr);
|
||||||
|
|
||||||
|
// there must be exactly one list of requests for this address in the table
|
||||||
|
assert(it != req_table.end());
|
||||||
|
|
||||||
|
// get the request list
|
||||||
|
OutstandingReqList& req_list = it->second;
|
||||||
|
assert(!req_list.empty());
|
||||||
|
|
||||||
|
// save a request
|
||||||
|
OutstandingReq ret_req = req_list.back();
|
||||||
|
|
||||||
|
// remove the request from the list
|
||||||
|
req_list.pop_back();
|
||||||
|
|
||||||
|
// if the list is now empty, remove it from req_table
|
||||||
|
if (req_list.empty()) {
|
||||||
|
req_table.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret_req;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::validateAtomicResp(Location loc, int lane, Value ret_val)
|
||||||
|
{
|
||||||
|
if (!addrManager->validateAtomicResp(loc, ret_val)) {
|
||||||
|
std::stringstream ss;
|
||||||
|
Addr addr = addrManager->getAddress(loc);
|
||||||
|
|
||||||
|
// basic info
|
||||||
|
ss << threadName << ": Atomic Op returned unexpected value\n"
|
||||||
|
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
|
||||||
|
<< "\tLane ID " << lane << "\n"
|
||||||
|
<< "\tAddress " << printAddress(addr) << "\n"
|
||||||
|
<< "\tAtomic Op's return value " << ret_val << "\n";
|
||||||
|
|
||||||
|
// print out basic info
|
||||||
|
warn("%s\n", ss.str());
|
||||||
|
|
||||||
|
// TODO add more detailed info
|
||||||
|
|
||||||
|
// dump all error info and exit the simulation
|
||||||
|
tester->dumpErrorLog(ss);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::validateLoadResp(Location loc, int lane, Value ret_val)
|
||||||
|
{
|
||||||
|
if (ret_val != addrManager->getLoggedValue(loc)) {
|
||||||
|
std::stringstream ss;
|
||||||
|
Addr addr = addrManager->getAddress(loc);
|
||||||
|
|
||||||
|
// basic info
|
||||||
|
ss << threadName << ": Loaded value is not consistent with "
|
||||||
|
<< "the last stored value\n"
|
||||||
|
<< "\tGpuThread " << threadId << "\n"
|
||||||
|
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
|
||||||
|
<< "\tLane ID " << lane << "\n"
|
||||||
|
<< "\tAddress " << printAddress(addr) << "\n"
|
||||||
|
<< "\tLoaded value " << ret_val << "\n"
|
||||||
|
<< "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
|
||||||
|
|
||||||
|
// print out basic info
|
||||||
|
warn("%s\n", ss.str());
|
||||||
|
|
||||||
|
// TODO add more detailed info
|
||||||
|
|
||||||
|
// dump all error info and exit the simulation
|
||||||
|
tester->dumpErrorLog(ss);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
GpuThread::checkDRF(Location atomic_loc, Location loc, bool isStore) const
|
||||||
|
{
|
||||||
|
if (curEpisode && curEpisode->isEpsActive()) {
|
||||||
|
// check against the current episode this thread is executing
|
||||||
|
return curEpisode->checkDRF(atomic_loc, loc, isStore, numLanes);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::checkDeadlock()
|
||||||
|
{
|
||||||
|
if ((curCycle() - lastActiveCycle) > deadlockThreshold) {
|
||||||
|
// deadlock detected
|
||||||
|
std::stringstream ss;
|
||||||
|
|
||||||
|
ss << threadName << ": Deadlock detected\n"
|
||||||
|
<< "\tLast active cycle: " << lastActiveCycle << "\n"
|
||||||
|
<< "\tCurrent cycle: " << curCycle() << "\n"
|
||||||
|
<< "\tDeadlock threshold: " << deadlockThreshold << "\n";
|
||||||
|
|
||||||
|
// print out basic info
|
||||||
|
warn("%s\n", ss.str());
|
||||||
|
|
||||||
|
// dump all error info and exit the simulation
|
||||||
|
tester->dumpErrorLog(ss);
|
||||||
|
} else if (!tester->checkExit()) {
|
||||||
|
// schedule a future deadlock check event
|
||||||
|
assert(!deadlockCheckEvent.scheduled());
|
||||||
|
schedule(deadlockCheckEvent,
|
||||||
|
deadlockThreshold * clockPeriod() + curTick());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::printOutstandingReqs(const OutstandingReqTable& table,
|
||||||
|
std::stringstream& ss) const
|
||||||
|
{
|
||||||
|
Cycles cur_cycle = curCycle();
|
||||||
|
|
||||||
|
for (const auto& m : table) {
|
||||||
|
for (const auto& req : m.second) {
|
||||||
|
ss << "\t\t\tAddr " << printAddress(m.first)
|
||||||
|
<< ": delta (curCycle - issueCycle) = "
|
||||||
|
<< (cur_cycle - req.issueCycle) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuThread::printAllOutstandingReqs(std::stringstream& ss) const
|
||||||
|
{
|
||||||
|
// dump all outstanding requests of this thread
|
||||||
|
ss << "\t\tOutstanding Loads:\n";
|
||||||
|
printOutstandingReqs(outstandingLoads, ss);
|
||||||
|
ss << "\t\tOutstanding Stores:\n";
|
||||||
|
printOutstandingReqs(outstandingStores, ss);
|
||||||
|
ss << "\t\tOutstanding Atomics:\n";
|
||||||
|
printOutstandingReqs(outstandingAtomics, ss);
|
||||||
|
ss << "\t\tNumber of outstanding acquires & releases: "
|
||||||
|
<< pendingFenceCount << std::endl;
|
||||||
|
}
|
||||||
199
src/cpu/testers/gpu_ruby_test/gpu_thread.hh
Normal file
199
src/cpu/testers/gpu_ruby_test/gpu_thread.hh
Normal file
@@ -0,0 +1,199 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GPU thread issues requests to and receives responses from Ruby memory
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
|
||||||
|
#define CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||||
|
#include "cpu/testers/gpu_ruby_test/episode.hh"
|
||||||
|
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||||
|
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||||
|
#include "sim/clocked_object.hh"
|
||||||
|
|
||||||
|
class GpuThread : public ClockedObject
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef GpuThreadParams Params;
|
||||||
|
GpuThread(const Params &p);
|
||||||
|
virtual ~GpuThread();
|
||||||
|
|
||||||
|
typedef AddressManager::Location Location;
|
||||||
|
typedef AddressManager::Value Value;
|
||||||
|
|
||||||
|
void wakeup();
|
||||||
|
void scheduleWakeup();
|
||||||
|
void checkDeadlock();
|
||||||
|
void scheduleDeadlockCheckEvent();
|
||||||
|
|
||||||
|
void attachGpuThreadToPorts(ProtocolTester *_tester,
|
||||||
|
ProtocolTester::SeqPort *_port,
|
||||||
|
ProtocolTester::SeqPort *_sqcPort = nullptr,
|
||||||
|
ProtocolTester::SeqPort *_scalarPort = nullptr);
|
||||||
|
|
||||||
|
const std::string& getName() const { return threadName; }
|
||||||
|
|
||||||
|
// must be implemented by a child class
|
||||||
|
virtual void hitCallback(PacketPtr pkt) = 0;
|
||||||
|
|
||||||
|
int getGpuThreadId() const { return threadId; }
|
||||||
|
int getNumLanes() const { return numLanes; }
|
||||||
|
// check if the input location would satisfy DRF constraint
|
||||||
|
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
|
||||||
|
|
||||||
|
void printAllOutstandingReqs(std::stringstream& ss) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
class GpuThreadEvent : public Event
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
GpuThread* thread;
|
||||||
|
std::string desc;
|
||||||
|
|
||||||
|
public:
|
||||||
|
GpuThreadEvent(GpuThread* _thread, std::string _description)
|
||||||
|
: Event(CPU_Tick_Pri), thread(_thread), desc(_description)
|
||||||
|
{}
|
||||||
|
void setDesc(std::string _description) { desc = _description; }
|
||||||
|
void process() { thread->wakeup(); }
|
||||||
|
const std::string name() { return desc; }
|
||||||
|
};
|
||||||
|
|
||||||
|
GpuThreadEvent threadEvent;
|
||||||
|
|
||||||
|
class DeadlockCheckEvent : public Event
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
GpuThread* thread;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DeadlockCheckEvent(GpuThread* _thread)
|
||||||
|
: Event(CPU_Tick_Pri), thread(_thread)
|
||||||
|
{}
|
||||||
|
void process() { thread->checkDeadlock(); }
|
||||||
|
const std::string name() const { return "Tester deadlock check"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
DeadlockCheckEvent deadlockCheckEvent;
|
||||||
|
|
||||||
|
struct OutstandingReq
|
||||||
|
{
|
||||||
|
int lane;
|
||||||
|
Location origLoc;
|
||||||
|
Value storedValue;
|
||||||
|
Cycles issueCycle;
|
||||||
|
|
||||||
|
OutstandingReq(int _lane, Location _loc, Value _val, Cycles _cycle)
|
||||||
|
: lane(_lane), origLoc(_loc), storedValue(_val), issueCycle(_cycle)
|
||||||
|
{}
|
||||||
|
|
||||||
|
~OutstandingReq()
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
// the unique global id of this thread
|
||||||
|
int threadId;
|
||||||
|
// width of this thread (1 for cpu thread & wf size for gpu wavefront)
|
||||||
|
int numLanes;
|
||||||
|
// thread name
|
||||||
|
std::string threadName;
|
||||||
|
// pointer to the main tester
|
||||||
|
ProtocolTester *tester;
|
||||||
|
// pointer to the address manager
|
||||||
|
AddressManager *addrManager;
|
||||||
|
|
||||||
|
ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
|
||||||
|
ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
|
||||||
|
ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
|
||||||
|
|
||||||
|
// a list of issued episodes sorted by time
|
||||||
|
// the last episode in the list is the current episode
|
||||||
|
typedef std::vector<Episode*> EpisodeHistory;
|
||||||
|
EpisodeHistory episodeHistory;
|
||||||
|
// pointer to the current episode
|
||||||
|
Episode *curEpisode;
|
||||||
|
// pointer to the current action
|
||||||
|
const Episode::Action *curAction;
|
||||||
|
|
||||||
|
// number of outstanding requests that are waiting for their responses
|
||||||
|
int pendingLdStCount;
|
||||||
|
int pendingFenceCount;
|
||||||
|
int pendingAtomicCount;
|
||||||
|
|
||||||
|
// last cycle when there is an event in this thread
|
||||||
|
Cycles lastActiveCycle;
|
||||||
|
Cycles deadlockThreshold;
|
||||||
|
|
||||||
|
// a per-address list of outstanding requests
|
||||||
|
typedef std::vector<OutstandingReq> OutstandingReqList;
|
||||||
|
typedef std::unordered_map<Addr, OutstandingReqList> OutstandingReqTable;
|
||||||
|
OutstandingReqTable outstandingLoads;
|
||||||
|
OutstandingReqTable outstandingStores;
|
||||||
|
OutstandingReqTable outstandingAtomics;
|
||||||
|
|
||||||
|
void issueNewEpisode();
|
||||||
|
// check if the next action in the current episode satisfies all wait_cnt
|
||||||
|
// constraints and is ready to issue
|
||||||
|
bool isNextActionReady();
|
||||||
|
void issueNextAction();
|
||||||
|
|
||||||
|
// issue Ops to Ruby memory
|
||||||
|
// must be implemented by a child class
|
||||||
|
virtual void issueLoadOps() = 0;
|
||||||
|
virtual void issueStoreOps() = 0;
|
||||||
|
virtual void issueAtomicOps() = 0;
|
||||||
|
virtual void issueAcquireOp() = 0;
|
||||||
|
virtual void issueReleaseOp() = 0;
|
||||||
|
|
||||||
|
// add an outstanding request to its corresponding table
|
||||||
|
void addOutstandingReqs(OutstandingReqTable& req_table, Addr addr,
|
||||||
|
int lane, Location loc,
|
||||||
|
Value stored_val = AddressManager::INVALID_VALUE);
|
||||||
|
|
||||||
|
// pop an outstanding request from the input table
|
||||||
|
OutstandingReq popOutstandingReq(OutstandingReqTable& req_table,
|
||||||
|
Addr address);
|
||||||
|
|
||||||
|
// validate all atomic responses
|
||||||
|
void validateAtomicResp(Location loc, int lane, Value ret_val);
|
||||||
|
// validate all Load responses
|
||||||
|
void validateLoadResp(Location loc, int lane, Value ret_val);
|
||||||
|
|
||||||
|
void printOutstandingReqs(const OutstandingReqTable& table,
|
||||||
|
std::stringstream& ss) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ */
|
||||||
377
src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
Normal file
377
src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
Normal file
@@ -0,0 +1,377 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||||
|
|
||||||
|
#include "debug/ProtocolTest.hh"
|
||||||
|
|
||||||
|
GpuWavefront::GpuWavefront(const Params &p)
|
||||||
|
: GpuThread(p), cuId(p.cu_id)
|
||||||
|
{
|
||||||
|
threadName = "GpuWavefront(GpuThread ID = " + std::to_string(threadId) +
|
||||||
|
", CU ID = " + std::to_string(cuId) + ")";
|
||||||
|
threadEvent.setDesc("GpuWavefront tick");
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuWavefront::~GpuWavefront()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuWavefront*
|
||||||
|
GpuWavefrontParams::create() const
|
||||||
|
{
|
||||||
|
return new GpuWavefront(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::issueLoadOps()
|
||||||
|
{
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::LOAD);
|
||||||
|
// we should not have any outstanding fence or atomic op at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
for (int lane = 0; lane < numLanes; ++lane) {
|
||||||
|
Location location = curAction->getLocation(lane);
|
||||||
|
assert(location >= AddressManager::INVALID_LOCATION);
|
||||||
|
|
||||||
|
// Make a request if we do not get an INVALID_LOCATION for this lane.
|
||||||
|
if (location >= 0) {
|
||||||
|
Addr address = addrManager->getAddress(location);
|
||||||
|
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
|
||||||
|
this->getName(), curEpisode->getEpisodeId(),
|
||||||
|
printAddress(address));
|
||||||
|
|
||||||
|
int load_size = sizeof(Value);
|
||||||
|
|
||||||
|
// for now, assert address is 4-byte aligned
|
||||||
|
assert(address % load_size == 0);
|
||||||
|
|
||||||
|
auto req = std::make_shared<Request>(address, load_size,
|
||||||
|
0, tester->requestorId(),
|
||||||
|
0, threadId, nullptr);
|
||||||
|
req->setPaddr(address);
|
||||||
|
req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||||
|
// set protocol-specific flags
|
||||||
|
setExtraRequestFlags(req);
|
||||||
|
|
||||||
|
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
|
||||||
|
uint8_t* data = new uint8_t[load_size];
|
||||||
|
pkt->dataDynamic(data);
|
||||||
|
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||||
|
|
||||||
|
// increment the number of outstanding ld_st requests
|
||||||
|
pendingLdStCount++;
|
||||||
|
|
||||||
|
if (!port->sendTimingReq(pkt)) {
|
||||||
|
panic("Not expected failed sendTimingReq\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert an outstanding load
|
||||||
|
addOutstandingReqs(outstandingLoads, address, lane, location);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::issueStoreOps()
|
||||||
|
{
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::STORE);
|
||||||
|
// we should not have any outstanding fence or atomic op at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
for (int lane = 0; lane < numLanes; ++lane) {
|
||||||
|
Location location = curAction->getLocation(lane);
|
||||||
|
assert(location >= AddressManager::INVALID_LOCATION);
|
||||||
|
|
||||||
|
// Make a request if we do not get an INVALID_LOCATION for this lane.
|
||||||
|
if (location >= 0) {
|
||||||
|
// prepare the next value to store
|
||||||
|
Value new_value = addrManager->getLoggedValue(location) + 1;
|
||||||
|
|
||||||
|
Addr address = addrManager->getAddress(location);
|
||||||
|
// must be aligned with store size
|
||||||
|
assert(address % sizeof(Value) == 0);
|
||||||
|
|
||||||
|
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
|
||||||
|
"Value %d\n", this->getName(),
|
||||||
|
curEpisode->getEpisodeId(), printAddress(address),
|
||||||
|
new_value);
|
||||||
|
|
||||||
|
auto req = std::make_shared<Request>(address, sizeof(Value),
|
||||||
|
0, tester->requestorId(), 0,
|
||||||
|
threadId, nullptr);
|
||||||
|
req->setPaddr(address);
|
||||||
|
req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||||
|
// set protocol-specific flags
|
||||||
|
setExtraRequestFlags(req);
|
||||||
|
|
||||||
|
PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
|
||||||
|
uint8_t *writeData = new uint8_t[sizeof(Value)];
|
||||||
|
for (int j = 0; j < sizeof(Value); ++j) {
|
||||||
|
writeData[j] = ((uint8_t*)&new_value)[j];
|
||||||
|
}
|
||||||
|
pkt->dataDynamic(writeData);
|
||||||
|
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||||
|
|
||||||
|
// increment the number of outstanding ld_st requests
|
||||||
|
pendingLdStCount++;
|
||||||
|
|
||||||
|
if (!port->sendTimingReq(pkt)) {
|
||||||
|
panic("Not expecting a failed sendTimingReq\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// add an outstanding store
|
||||||
|
addOutstandingReqs(outstandingStores, address, lane, location,
|
||||||
|
new_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::issueAtomicOps()
|
||||||
|
{
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
|
||||||
|
// we should not have any outstanding ops at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
// we use atomic_inc in the tester
|
||||||
|
Request::Flags flags = Request::ATOMIC_RETURN_OP;
|
||||||
|
|
||||||
|
for (int lane = 0; lane < numLanes; ++lane) {
|
||||||
|
Location location = curAction->getLocation(lane);
|
||||||
|
assert(location >= 0);
|
||||||
|
|
||||||
|
Addr address = addrManager->getAddress(location);
|
||||||
|
|
||||||
|
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
|
||||||
|
this->getName(), curEpisode->getEpisodeId(),
|
||||||
|
printAddress(address));
|
||||||
|
|
||||||
|
// must be aligned with store size
|
||||||
|
assert(address % sizeof(Value) == 0);
|
||||||
|
AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
|
||||||
|
auto req = std::make_shared<Request>(address, sizeof(Value),
|
||||||
|
flags, tester->requestorId(),
|
||||||
|
0, threadId,
|
||||||
|
AtomicOpFunctorPtr(amo_op));
|
||||||
|
req->setPaddr(address);
|
||||||
|
req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||||
|
// set protocol-specific flags
|
||||||
|
setExtraRequestFlags(req);
|
||||||
|
|
||||||
|
PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
|
||||||
|
uint8_t* data = new uint8_t[sizeof(Value)];
|
||||||
|
pkt->dataDynamic(data);
|
||||||
|
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||||
|
|
||||||
|
if (!port->sendTimingReq(pkt)) {
|
||||||
|
panic("Not expecting failed sendTimingReq\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// increment the number of outstanding atomic ops
|
||||||
|
pendingAtomicCount++;
|
||||||
|
|
||||||
|
// add an outstanding atomic
|
||||||
|
addOutstandingReqs(outstandingAtomics, address, lane, location);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::issueAcquireOp()
|
||||||
|
{
|
||||||
|
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
|
||||||
|
curEpisode->getEpisodeId());
|
||||||
|
|
||||||
|
assert(curAction);
|
||||||
|
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
|
||||||
|
// we should not have any outstanding ops at this point
|
||||||
|
assert(pendingFenceCount == 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
auto acq_req = std::make_shared<Request>(0, 0, 0,
|
||||||
|
tester->requestorId(), 0,
|
||||||
|
threadId, nullptr);
|
||||||
|
acq_req->setPaddr(0);
|
||||||
|
acq_req->setReqInstSeqNum(tester->getActionSeqNum());
|
||||||
|
acq_req->setFlags(Request::ACQUIRE);
|
||||||
|
// set protocol-specific flags
|
||||||
|
setExtraRequestFlags(acq_req);
|
||||||
|
|
||||||
|
PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
|
||||||
|
pkt->senderState = new ProtocolTester::SenderState(this);
|
||||||
|
|
||||||
|
// increment the number of outstanding fence requests
|
||||||
|
pendingFenceCount++;
|
||||||
|
|
||||||
|
if (!port->sendTimingReq(pkt)) {
|
||||||
|
panic("Not expecting failed sendTimingReq\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::issueReleaseOp()
|
||||||
|
{
|
||||||
|
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
|
||||||
|
curEpisode->getEpisodeId());
|
||||||
|
|
||||||
|
// A release fence simply waits for all previous stores to complete. All
|
||||||
|
// previous loads and stores were done before this release operation is
|
||||||
|
// issued, so issueReleaseOp is just a no-op in this tester.
|
||||||
|
|
||||||
|
// we may be able to issue an action. Let's check
|
||||||
|
if (!threadEvent.scheduled()) {
|
||||||
|
scheduleWakeup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::hitCallback(PacketPtr pkt)
|
||||||
|
{
|
||||||
|
assert(pkt);
|
||||||
|
MemCmd resp_cmd = pkt->cmd;
|
||||||
|
Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr();
|
||||||
|
|
||||||
|
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
|
||||||
|
"Addr %s\n", this->getName(),
|
||||||
|
curEpisode->getEpisodeId(), resp_cmd.toString(),
|
||||||
|
printAddress(addr));
|
||||||
|
|
||||||
|
// whether the transaction is done after this hitCallback
|
||||||
|
bool isTransactionDone = true;
|
||||||
|
|
||||||
|
if (resp_cmd == MemCmd::MemSyncResp) {
|
||||||
|
// response to a pending fence
|
||||||
|
// no validation needed for fence responses
|
||||||
|
assert(pendingFenceCount > 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
pendingFenceCount--;
|
||||||
|
} else if (resp_cmd == MemCmd::ReadResp) {
|
||||||
|
// response to a pending read
|
||||||
|
assert(pendingLdStCount > 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
assert(outstandingLoads.count(addr) > 0);
|
||||||
|
|
||||||
|
// get return data
|
||||||
|
Value value = *(pkt->getPtr<Value>());
|
||||||
|
OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
|
||||||
|
validateLoadResp(req.origLoc, req.lane, value);
|
||||||
|
|
||||||
|
// this Read is done
|
||||||
|
pendingLdStCount--;
|
||||||
|
} else if (resp_cmd == MemCmd::WriteResp) {
|
||||||
|
// response to a pending write
|
||||||
|
assert(pendingLdStCount > 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
// no need to validate Write response
|
||||||
|
// just pop it from the outstanding req table so that subsequent
|
||||||
|
// requests dependent on this write can proceed
|
||||||
|
// note that we don't decrement pendingLdStCount here yet since
|
||||||
|
// the write is not yet completed in downstream memory. Instead, we
|
||||||
|
// decrement the counter when we receive the write completion ack
|
||||||
|
assert(outstandingStores.count(addr) > 0);
|
||||||
|
OutstandingReq req = popOutstandingReq(outstandingStores, addr);
|
||||||
|
assert(req.storedValue != AddressManager::INVALID_VALUE);
|
||||||
|
|
||||||
|
// update log table
|
||||||
|
addrManager->updateLogTable(req.origLoc, threadId,
|
||||||
|
curEpisode->getEpisodeId(),
|
||||||
|
req.storedValue,
|
||||||
|
curTick(),
|
||||||
|
cuId);
|
||||||
|
|
||||||
|
// the transaction is not done yet. Waiting for write completion ack
|
||||||
|
isTransactionDone = false;
|
||||||
|
} else if (resp_cmd == MemCmd::SwapResp) {
|
||||||
|
// response to a pending atomic
|
||||||
|
assert(pendingAtomicCount > 0);
|
||||||
|
assert(pendingLdStCount == 0);
|
||||||
|
assert(outstandingAtomics.count(addr) > 0);
|
||||||
|
|
||||||
|
// get return data
|
||||||
|
Value value = *(pkt->getPtr<Value>());
|
||||||
|
|
||||||
|
// validate atomic op return
|
||||||
|
OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
|
||||||
|
validateAtomicResp(req.origLoc, req.lane, value);
|
||||||
|
|
||||||
|
// update log table
|
||||||
|
addrManager->updateLogTable(req.origLoc, threadId,
|
||||||
|
curEpisode->getEpisodeId(), value,
|
||||||
|
curTick(),
|
||||||
|
cuId);
|
||||||
|
|
||||||
|
// this Atomic is done
|
||||||
|
pendingAtomicCount--;
|
||||||
|
} else if (resp_cmd == MemCmd::WriteCompleteResp) {
|
||||||
|
// write completion ACK
|
||||||
|
assert(pendingLdStCount > 0);
|
||||||
|
assert(pendingAtomicCount == 0);
|
||||||
|
|
||||||
|
// the Write is now done
|
||||||
|
pendingLdStCount--;
|
||||||
|
} else {
|
||||||
|
panic("Unsupported MemCmd response type");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isTransactionDone) {
|
||||||
|
// no need to keep senderState and request around
|
||||||
|
delete pkt->senderState;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete pkt;
|
||||||
|
|
||||||
|
// record the last active cycle to check for deadlock
|
||||||
|
lastActiveCycle = curCycle();
|
||||||
|
|
||||||
|
// we may be able to issue an action. Let's check
|
||||||
|
if (!threadEvent.scheduled()) {
|
||||||
|
scheduleWakeup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
GpuWavefront::setExtraRequestFlags(RequestPtr req)
|
||||||
|
{
|
||||||
|
// No extra request flag is set
|
||||||
|
}
|
||||||
68
src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh
Normal file
68
src/cpu/testers/gpu_ruby_test/gpu_wavefront.hh
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
|
||||||
|
#define CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||||
|
#include "params/GpuWavefront.hh"
|
||||||
|
#include "sim/clocked_object.hh"
|
||||||
|
|
||||||
|
class GpuWavefront : public GpuThread
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef GpuWavefrontParams Params;
|
||||||
|
GpuWavefront(const Params &p);
|
||||||
|
virtual ~GpuWavefront();
|
||||||
|
|
||||||
|
typedef AddressManager::Location Location;
|
||||||
|
typedef AddressManager::Value Value;
|
||||||
|
|
||||||
|
virtual void hitCallback(PacketPtr pkt);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void issueLoadOps();
|
||||||
|
void issueStoreOps();
|
||||||
|
void issueAtomicOps();
|
||||||
|
// acquire and release ops are protocol-specific, so their issue functions
|
||||||
|
// may be redefined by a child class of GpuWavefront
|
||||||
|
virtual void issueAcquireOp();
|
||||||
|
virtual void issueReleaseOp();
|
||||||
|
// set extra request flags that is specific to a target protocol
|
||||||
|
virtual void setExtraRequestFlags(RequestPtr req);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int cuId; // compute unit associated with this wavefront
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ */
|
||||||
312
src/cpu/testers/gpu_ruby_test/protocol_tester.cc
Normal file
312
src/cpu/testers/gpu_ruby_test/protocol_tester.cc
Normal file
@@ -0,0 +1,312 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <ctime>
|
||||||
|
#include <fstream>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
|
||||||
|
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||||
|
#include "debug/ProtocolTest.hh"
|
||||||
|
#include "mem/request.hh"
|
||||||
|
#include "sim/sim_exit.hh"
|
||||||
|
#include "sim/system.hh"
|
||||||
|
|
||||||
|
ProtocolTester::ProtocolTester(const Params &p)
|
||||||
|
: ClockedObject(p),
|
||||||
|
_requestorId(p.system->getRequestorId(this)),
|
||||||
|
numCpuPorts(p.port_cpu_ports_connection_count),
|
||||||
|
numVectorPorts(p.port_cu_vector_ports_connection_count),
|
||||||
|
numSqcPorts(p.port_cu_sqc_ports_connection_count),
|
||||||
|
numScalarPorts(p.port_cu_scalar_ports_connection_count),
|
||||||
|
numCusPerSqc(p.cus_per_sqc),
|
||||||
|
numCusPerScalar(p.cus_per_scalar),
|
||||||
|
numWfsPerCu(p.wavefronts_per_cu),
|
||||||
|
numWisPerWf(p.workitems_per_wavefront),
|
||||||
|
numAtomicLocs(p.num_atomic_locations),
|
||||||
|
numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
|
||||||
|
episodeLength(p.episode_length),
|
||||||
|
maxNumEpisodes(p.max_num_episodes),
|
||||||
|
debugTester(p.debug_tester),
|
||||||
|
cpuThreads(p.cpu_threads),
|
||||||
|
wfs(p.wavefronts)
|
||||||
|
{
|
||||||
|
int idx = 0; // global port index
|
||||||
|
|
||||||
|
numCpus = numCpuPorts; // 1 cpu port per CPU
|
||||||
|
numCus = numVectorPorts; // 1 vector port per CU
|
||||||
|
|
||||||
|
// create all physical cpu's data ports
|
||||||
|
for (int i = 0; i < numCpuPorts; ++i) {
|
||||||
|
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||||
|
csprintf("%s-cpuPort%d", name(), i));
|
||||||
|
cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i),
|
||||||
|
this, i, idx));
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// create all physical gpu's data ports
|
||||||
|
for (int i = 0; i < numVectorPorts; ++i) {
|
||||||
|
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||||
|
csprintf("%s-cuVectorPort%d", name(), i));
|
||||||
|
cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d",
|
||||||
|
name(), i),
|
||||||
|
this, i, idx));
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numScalarPorts; ++i) {
|
||||||
|
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||||
|
csprintf("%s-cuScalarPort%d", name(), i));
|
||||||
|
cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d",
|
||||||
|
name(), i),
|
||||||
|
this, i, idx));
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numSqcPorts; ++i) {
|
||||||
|
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||||
|
csprintf("%s-cuSqcPort%d", name(), i));
|
||||||
|
cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d",
|
||||||
|
name(), i),
|
||||||
|
this, i, idx));
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// create an address manager
|
||||||
|
addrManager = new AddressManager(numAtomicLocs,
|
||||||
|
numNormalLocsPerAtomic);
|
||||||
|
nextEpisodeId = 0;
|
||||||
|
|
||||||
|
if (!debugTester)
|
||||||
|
warn("Data race check is not enabled\n");
|
||||||
|
|
||||||
|
sentExitSignal = false;
|
||||||
|
|
||||||
|
// set random seed number
|
||||||
|
if (p.random_seed != 0) {
|
||||||
|
srand(p.random_seed);
|
||||||
|
} else {
|
||||||
|
srand(time(NULL));
|
||||||
|
}
|
||||||
|
|
||||||
|
actionCount = 0;
|
||||||
|
|
||||||
|
// create a new log file
|
||||||
|
logFile = simout.create(p.log_file);
|
||||||
|
assert(logFile);
|
||||||
|
|
||||||
|
// print test configs
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "GPU Ruby test's configurations" << std::endl
|
||||||
|
<< "\tNumber of CPUs: " << numCpus << std::endl
|
||||||
|
<< "\tNumber of CUs: " << numCus << std::endl
|
||||||
|
<< "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
|
||||||
|
<< "\tWavefront size: " << numWisPerWf << std::endl
|
||||||
|
<< "\tNumber of atomic locations: " << numAtomicLocs << std::endl
|
||||||
|
<< "\tNumber of non-atomic locations: "
|
||||||
|
<< numNormalLocsPerAtomic * numAtomicLocs << std::endl
|
||||||
|
<< "\tEpisode length: " << episodeLength << std::endl
|
||||||
|
<< "\tTest length (max number of episodes): " << maxNumEpisodes
|
||||||
|
<< std::endl
|
||||||
|
<< "\tRandom seed: " << p.random_seed
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
ccprintf(*(logFile->stream()), "%s", ss.str());
|
||||||
|
logFile->stream()->flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
ProtocolTester::~ProtocolTester()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < cpuPorts.size(); ++i)
|
||||||
|
delete cpuPorts[i];
|
||||||
|
for (int i = 0; i < cuVectorPorts.size(); ++i)
|
||||||
|
delete cuVectorPorts[i];
|
||||||
|
for (int i = 0; i < cuScalarPorts.size(); ++i)
|
||||||
|
delete cuScalarPorts[i];
|
||||||
|
for (int i = 0; i < cuSqcPorts.size(); ++i)
|
||||||
|
delete cuSqcPorts[i];
|
||||||
|
delete addrManager;
|
||||||
|
|
||||||
|
// close the log file
|
||||||
|
simout.close(logFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ProtocolTester::init()
|
||||||
|
{
|
||||||
|
DPRINTF(ProtocolTest, "Attach threads to ports\n");
|
||||||
|
|
||||||
|
// connect cpu threads to cpu's ports
|
||||||
|
for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) {
|
||||||
|
cpuThreads[cpu_id]->attachGpuThreadToPorts(this,
|
||||||
|
static_cast<SeqPort*>(cpuPorts[cpu_id]));
|
||||||
|
cpuThreads[cpu_id]->scheduleWakeup();
|
||||||
|
cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
|
||||||
|
}
|
||||||
|
|
||||||
|
// connect gpu wavefronts to gpu's ports
|
||||||
|
int wfId = 0;
|
||||||
|
int vectorPortId = 0;
|
||||||
|
int sqcPortId = 0;
|
||||||
|
int scalarPortId = 0;
|
||||||
|
|
||||||
|
for (int cu_id = 0; cu_id < numCus; ++cu_id) {
|
||||||
|
vectorPortId = cu_id;
|
||||||
|
sqcPortId = cu_id/numCusPerSqc;
|
||||||
|
scalarPortId = cu_id/numCusPerScalar;
|
||||||
|
|
||||||
|
for (int i = 0; i < numWfsPerCu; ++i) {
|
||||||
|
wfId = cu_id * numWfsPerCu + i;
|
||||||
|
wfs[wfId]->attachGpuThreadToPorts(this,
|
||||||
|
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
|
||||||
|
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
|
||||||
|
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
|
||||||
|
wfs[wfId]->scheduleWakeup();
|
||||||
|
wfs[wfId]->scheduleDeadlockCheckEvent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Port&
|
||||||
|
ProtocolTester::getPort(const std::string &if_name, PortID idx)
|
||||||
|
{
|
||||||
|
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
|
||||||
|
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
|
||||||
|
// pass along to super class
|
||||||
|
return ClockedObject::getPort(if_name, idx);
|
||||||
|
} else {
|
||||||
|
if (if_name == "cpu_ports") {
|
||||||
|
if (idx > numCpuPorts)
|
||||||
|
panic("ProtocolTester: unknown cpu port %d\n", idx);
|
||||||
|
return *cpuPorts[idx];
|
||||||
|
} else if (if_name == "cu_vector_ports") {
|
||||||
|
if (idx > numVectorPorts)
|
||||||
|
panic("ProtocolTester: unknown cu vect port %d\n", idx);
|
||||||
|
return *cuVectorPorts[idx];
|
||||||
|
} else if (if_name == "cu_sqc_ports") {
|
||||||
|
if (idx > numSqcPorts)
|
||||||
|
panic("ProtocolTester: unknown cu sqc port %d\n", idx);
|
||||||
|
return *cuSqcPorts[idx];
|
||||||
|
} else {
|
||||||
|
assert(if_name == "cu_scalar_ports");
|
||||||
|
if (idx > numScalarPorts)
|
||||||
|
panic("ProtocolTester: unknown cu scal port %d\n", idx);
|
||||||
|
return *cuScalarPorts[idx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ProtocolTester::checkExit()
|
||||||
|
{
|
||||||
|
if (nextEpisodeId > maxNumEpisodes) {
|
||||||
|
if (!sentExitSignal) {
|
||||||
|
// all done
|
||||||
|
inform("Total completed episodes: %d\n", nextEpisodeId - 1);
|
||||||
|
exitSimLoop("GPU Ruby Tester: Passed!");
|
||||||
|
sentExitSignal = true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ProtocolTester::checkDRF(Location atomic_loc,
|
||||||
|
Location loc, bool isStore) const
|
||||||
|
{
|
||||||
|
if (debugTester) {
|
||||||
|
// go through all active episodes in all threads
|
||||||
|
for (const GpuThread* th : wfs) {
|
||||||
|
if (!th->checkDRF(atomic_loc, loc, isStore))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const GpuThread* th : cpuThreads) {
|
||||||
|
if (!th->checkDRF(atomic_loc, loc, isStore))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ProtocolTester::dumpErrorLog(std::stringstream& ss)
|
||||||
|
{
|
||||||
|
if (!sentExitSignal) {
|
||||||
|
// go through all threads and dump their outstanding requests
|
||||||
|
for (auto t : cpuThreads) {
|
||||||
|
t->printAllOutstandingReqs(ss);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto t : wfs) {
|
||||||
|
t->printAllOutstandingReqs(ss);
|
||||||
|
}
|
||||||
|
|
||||||
|
// dump error log into a file
|
||||||
|
assert(logFile);
|
||||||
|
ccprintf(*(logFile->stream()), "%s", ss.str());
|
||||||
|
logFile->stream()->flush();
|
||||||
|
|
||||||
|
sentExitSignal = true;
|
||||||
|
// terminate the simulation
|
||||||
|
panic("GPU Ruby Tester: Failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt)
|
||||||
|
{
|
||||||
|
// get the requesting thread from the original sender state
|
||||||
|
ProtocolTester::SenderState* senderState =
|
||||||
|
safe_cast<ProtocolTester::SenderState*>(pkt->senderState);
|
||||||
|
GpuThread *th = senderState->th;
|
||||||
|
|
||||||
|
th->hitCallback(pkt);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProtocolTester*
|
||||||
|
ProtocolTesterParams::create() const
|
||||||
|
{
|
||||||
|
return new ProtocolTester(*this);
|
||||||
|
}
|
||||||
178
src/cpu/testers/gpu_ruby_test/protocol_tester.hh
Normal file
178
src/cpu/testers/gpu_ruby_test/protocol_tester.hh
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* For use for simulation and test purposes only
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
|
||||||
|
#define CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The tester includes the main ProtocolTester that manages all ports to the
|
||||||
|
* memory system.
|
||||||
|
* GpuThreads are mapped to certain data port(s)
|
||||||
|
*
|
||||||
|
* GpuThreads inject memory requests through their data ports.
|
||||||
|
* The tester receives and validates responses from the memory.
|
||||||
|
*
|
||||||
|
* Main components
|
||||||
|
* - AddressManager: generate DRF request streams &
|
||||||
|
* validate data response against an internal log_table
|
||||||
|
* - Episode: a sequence of requests
|
||||||
|
* - Thread: either GPU wavefront or CPU thread
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "base/types.hh"
|
||||||
|
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||||
|
#include "mem/packet.hh"
|
||||||
|
#include "mem/ruby/system/RubyPort.hh"
|
||||||
|
#include "params/ProtocolTester.hh"
|
||||||
|
|
||||||
|
class GpuThread;
|
||||||
|
class CpuThread;
|
||||||
|
class GpuWavefront;
|
||||||
|
|
||||||
|
class ProtocolTester : public ClockedObject
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
class SeqPort : public RequestPort
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id,
|
||||||
|
PortID _index)
|
||||||
|
: RequestPort(_name, _tester, _id)
|
||||||
|
{}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual bool recvTimingResp(PacketPtr pkt);
|
||||||
|
virtual void recvReqRetry()
|
||||||
|
{ panic("%s does not expect a retry\n", name()); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SenderState : public Packet::SenderState
|
||||||
|
{
|
||||||
|
GpuThread* th;
|
||||||
|
SenderState(GpuThread* _th)
|
||||||
|
{
|
||||||
|
assert(_th);
|
||||||
|
th = _th;
|
||||||
|
}
|
||||||
|
|
||||||
|
~SenderState()
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef ProtocolTesterParams Params;
|
||||||
|
ProtocolTester(const Params &p);
|
||||||
|
~ProtocolTester();
|
||||||
|
|
||||||
|
typedef AddressManager::Location Location;
|
||||||
|
typedef AddressManager::Value Value;
|
||||||
|
|
||||||
|
void init();
|
||||||
|
RequestorID requestorId() { return _requestorId; };
|
||||||
|
Port& getPort(const std::string &if_name,
|
||||||
|
PortID idx=InvalidPortID) override;
|
||||||
|
|
||||||
|
int getEpisodeLength() const { return episodeLength; }
|
||||||
|
// return pointer to the address manager
|
||||||
|
AddressManager* getAddressManager() const { return addrManager; }
|
||||||
|
// return true if the tester should stop issuing new episodes
|
||||||
|
bool checkExit();
|
||||||
|
// verify if a location to be picked for LD/ST will satisfy
|
||||||
|
// data race free requirement
|
||||||
|
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
|
||||||
|
// return the next episode id and increment it
|
||||||
|
int getNextEpisodeID() { return nextEpisodeId++; }
|
||||||
|
// get action sequence number
|
||||||
|
int getActionSeqNum() { return actionCount++; }
|
||||||
|
|
||||||
|
// dump error log into a file and exit the simulation
|
||||||
|
void dumpErrorLog(std::stringstream& ss);
|
||||||
|
|
||||||
|
private:
|
||||||
|
RequestorID _requestorId;
|
||||||
|
|
||||||
|
// list of parameters taken from python scripts
|
||||||
|
int numCpuPorts;
|
||||||
|
int numVectorPorts;
|
||||||
|
int numSqcPorts;
|
||||||
|
int numScalarPorts;
|
||||||
|
int numCusPerSqc;
|
||||||
|
int numCusPerScalar;
|
||||||
|
int numWfsPerCu;
|
||||||
|
int numWisPerWf;
|
||||||
|
// parameters controlling the address range that the tester can access
|
||||||
|
int numAtomicLocs;
|
||||||
|
int numNormalLocsPerAtomic;
|
||||||
|
// the number of actions in an episode (episodeLength +- random number)
|
||||||
|
int episodeLength;
|
||||||
|
// the maximum number of episodes to be completed by this tester
|
||||||
|
int maxNumEpisodes;
|
||||||
|
// are we debuggin the tester
|
||||||
|
bool debugTester;
|
||||||
|
|
||||||
|
// all available requestor ports connected to Ruby
|
||||||
|
std::vector<RequestPort*> cpuPorts; // cpu data ports
|
||||||
|
std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
|
||||||
|
std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
|
||||||
|
std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
|
||||||
|
// all CPU and GPU threads
|
||||||
|
std::vector<CpuThread*> cpuThreads;
|
||||||
|
std::vector<GpuWavefront*> wfs;
|
||||||
|
|
||||||
|
// address manager that (1) generates DRF sequences of requests,
|
||||||
|
// (2) manages an internal log table and
|
||||||
|
// (3) validate response data
|
||||||
|
AddressManager* addrManager;
|
||||||
|
|
||||||
|
// number of CPUs and CUs
|
||||||
|
int numCpus;
|
||||||
|
int numCus;
|
||||||
|
// unique id of the next episode
|
||||||
|
int nextEpisodeId;
|
||||||
|
|
||||||
|
// global action count. Overflow is fine. It's used to uniquely identify
|
||||||
|
// per-wave & per-instruction memory requests in the coalescer
|
||||||
|
int actionCount;
|
||||||
|
|
||||||
|
// if an exit signal was already sent
|
||||||
|
bool sentExitSignal;
|
||||||
|
|
||||||
|
OutputStream* logFile;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ */
|
||||||
Reference in New Issue
Block a user