tests,configs,mem-ruby: Adding Ruby tester for GPU_VIPER

This patch adds the GPU protocol tester that uses data-race-free
operation to discover bugs in GPU protocols including GPU_VIPER. For
more information please see the following paper and the README:

T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous
Data-Race-Free GPU Testing," 2019 IEEE International Symposium on
Workload Characterization (IISWC), Orlando, FL, USA, 2019, pp. 81-92,
doi: 10.1109/IISWC47752.2019.9042019.

Change-Id: Ic9939d131a930d1e7014ed0290601140bdd1499f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32855
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2020-09-24 14:53:13 -05:00
parent 1a2b677728
commit f36817c367
19 changed files with 3498 additions and 103 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
# Copyright (c) 2018-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
@@ -43,145 +43,272 @@ addToPath('../')
from common import Options
from ruby import Ruby
# Get paths we might need.
config_path = os.path.dirname(os.path.abspath(__file__))
config_root = os.path.dirname(config_path)
m5_root = os.path.dirname(config_root)
parser = optparse.OptionParser()
Options.addNoISAOptions(parser)
parser.add_option("--maxloads", metavar="N", default=100,
help="Stop after N loads")
parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
help="Wakeup every N cycles")
parser.add_option("-u", "--num-compute-units", type="int", default=1,
help="number of compute units in the GPU")
parser.add_option("--num-cp", type="int", default=0,
help="Number of GPU Command Processors (CP)")
# not super important now, but to avoid putting the number 4 everywhere, make
# it an option/knob
parser.add_option("--cu-per-sqc", type="int", default=4, help="number of CUs \
sharing an SQC (icache, and thus icache TLB)")
parser.add_option("--simds-per-cu", type="int", default=4, help="SIMD units" \
"per CU")
parser.add_option("--wf-size", type="int", default=64,
help="Wavefront size(in workitems)")
parser.add_option("--wfs-per-simd", type="int", default=10, help="Number of " \
"WF slots per SIMD")
#
# Add the ruby specific and protocol specific options
#
parser = optparse.OptionParser()
Options.addNoISAOptions(parser)
Ruby.define_options(parser)
exec(compile( \
open(os.path.join(config_root, "common", "Options.py")).read(), \
os.path.join(config_root, "common", "Options.py"), 'exec'))
# GPU Ruby tester options
parser.add_option("--cache-size", type="choice", default="small",
choices=["small", "large"],
help="Cache sizes to use. Small encourages races between \
requests and writebacks. Large stresses write-through \
and/or write-back GPU caches.")
parser.add_option("--system-size", type="choice", default="small",
choices=["small", "medium", "large"],
help="This option defines how many CUs, CPUs and cache \
components in the test system.")
parser.add_option("--address-range", type="choice", default="small",
choices=["small", "large"],
help="This option defines the number of atomic \
locations that affects the working set's size. \
A small number of atomic locations encourage more \
races among threads. The large option stresses cache \
resources.")
parser.add_option("--episode-length", type="choice", default="short",
choices=["short", "medium", "long"],
help="This option defines the number of LDs and \
STs in an episode. The small option encourages races \
between the start and end of an episode. The long \
option encourages races between LDs and STs in the \
same episode.")
parser.add_option("--test-length", type="int", default=1,
help="The number of episodes to be executed by each \
wavefront. This determines the maximum number, i.e., \
val X #WFs, of episodes to be executed in the test.")
parser.add_option("--debug-tester", action='store_true',
help="This option will turn on DRF checker")
parser.add_option("--random-seed", type="int", default=0,
help="Random seed number. Default value (i.e., 0) means \
using runtime-specific value")
parser.add_option("--log-file", type="string", default="gpu-ruby-test.log")
(options, args) = parser.parse_args()
#
# Set the default cache size and associativity to be very small to encourage
# races between requests and writebacks.
#
options.l1d_size="256B"
options.l1i_size="256B"
options.l2_size="512B"
options.l3_size="1kB"
options.l1d_assoc=2
options.l1i_assoc=2
options.l2_assoc=2
options.l3_assoc=2
# This file can support multiple compute units
assert(options.num_compute_units >= 1)
n_cu = options.num_compute_units
options.num_sqc = int((n_cu + options.cu_per_sqc - 1) // options.cu_per_sqc)
if args:
print("Error: script doesn't take any positional arguments")
sys.exit(1)
#
# Create the ruby random tester
# Set up cache size - 2 options
# 0: small cache
# 1: large cache
#
# Check to for the GPU_RfO protocol. Other GPU protocols are non-SC and will
# not work with the Ruby random tester.
assert(buildEnv['PROTOCOL'] == 'GPU_RfO')
# The GPU_RfO protocol does not support cache flushes
check_flush = False
tester = RubyTester(check_flush=check_flush,
checks_to_complete=options.maxloads,
wakeup_frequency=options.wakeup_freq,
deadlock_threshold=1000000)
if (options.cache_size == "small"):
options.tcp_size="256B"
options.tcp_assoc=2
options.tcc_size="1kB"
options.tcc_assoc=2
elif (options.cache_size == "large"):
options.tcp_size="256kB"
options.tcp_assoc=16
options.tcc_size="1024kB"
options.tcc_assoc=16
#
# Create the M5 system. Note that the Memory Object isn't
# actually used by the rubytester, but is included to support the
# M5 memory size == Ruby memory size checks
# Set up system size - 3 options
#
system = System(cpu=tester, mem_ranges=[AddrRange(options.mem_size)])
if (options.system_size == "small"):
# 1 CU, 1 CPU, 1 SQC, 1 Scalar
options.wf_size = 1
options.wavefronts_per_cu = 1
options.num_cpus = 1
options.cu_per_sqc = 1
options.cu_per_scalar_cache = 1
options.num_compute_units = 1
elif (options.system_size == "medium"):
# 4 CUs, 4 CPUs, 1 SQCs, 1 Scalars
options.wf_size = 16
options.wavefronts_per_cu = 4
options.num_cpus = 4
options.cu_per_sqc = 4
options.cu_per_scalar_cache = 4
options.num_compute_units = 4
elif (options.system_size == "large"):
# 8 CUs, 4 CPUs, 1 SQCs, 1 Scalars
options.wf_size = 32
options.wavefronts_per_cu = 4
options.num_cpus = 4
options.cu_per_sqc = 4
options.cu_per_scalar_cache = 4
options.num_compute_units = 8
# Create a top-level voltage domain and clock domain
system.voltage_domain = VoltageDomain(voltage=options.sys_voltage)
#
# Set address range - 2 options
# level 0: small
# level 1: large
# Each location corresponds to a 4-byte piece of data
#
options.mem_size = '1024MB'
if (options.address_range == "small"):
num_atomic_locs = 10
num_regular_locs_per_atomic_loc = 10000
elif (options.address_range == "large"):
num_atomic_locs = 100
num_regular_locs_per_atomic_loc = 100000
system.clk_domain = SrcClockDomain(clock=options.sys_clock,
voltage_domain=system.voltage_domain)
#
# Set episode length (# of actions per episode) - 3 options
# 0: 10 actions
# 1: 100 actions
# 2: 500 actions
#
if (options.episode_length == "short"):
eps_length = 10
elif (options.episode_length == "medium"):
eps_length = 100
elif (options.episode_length == "long"):
eps_length = 500
#
# Set Ruby and tester deadlock thresholds. Ruby's deadlock detection is the
# primary check for deadlocks. The tester's deadlock threshold detection is
# a secondary check for deadlock. If there is a bug in RubyPort that causes
# a packet not to return to the tester properly, the tester will issue a
# deadlock panic. We set cache_deadlock_threshold < tester_deadlock_threshold
# to detect deadlock caused by Ruby protocol first before one caused by the
# coalescer. Both units are in Ticks
#
options.cache_deadlock_threshold = 1e8
tester_deadlock_threshold = 1e9
# For now we're testing only GPU protocol, so we force num_cpus to be 0
options.num_cpus = 0
# Number of CUs
n_CUs = options.num_compute_units
# Set test length, i.e., number of episodes per wavefront * #WFs.
# Test length can be 1x#WFs, 10x#WFs, 100x#WFs, ...
n_WFs = n_CUs * options.wavefronts_per_cu
max_episodes = options.test_length * n_WFs
# Number of SQC and Scalar caches
assert(n_CUs % options.cu_per_sqc == 0)
n_SQCs = n_CUs // options.cu_per_sqc
options.num_sqc = n_SQCs
assert(options.cu_per_scalar_cache != 0)
n_Scalars = n_CUs // options.cu_per_scalar_cache
options.num_scalar_cache = n_Scalars
#
# Create GPU Ruby random tester
#
tester = ProtocolTester(cus_per_sqc = options.cu_per_sqc,
cus_per_scalar = options.cu_per_scalar_cache,
wavefronts_per_cu = options.wavefronts_per_cu,
workitems_per_wavefront = options.wf_size,
num_atomic_locations = num_atomic_locs,
num_normal_locs_per_atomic = \
num_regular_locs_per_atomic_loc,
max_num_episodes = max_episodes,
episode_length = eps_length,
debug_tester = options.debug_tester,
random_seed = options.random_seed,
log_file = options.log_file)
#
# Create a gem5 system. Note that the memory object isn't actually used by the
# tester, but is included to ensure the gem5 memory size == Ruby memory size
# checks. The system doesn't have real CPUs or CUs. It just has a tester that
# has physical ports to be connected to Ruby
#
system = System(cpu = tester,
mem_ranges = [AddrRange(options.mem_size)],
cache_line_size = options.cacheline_size,
mem_mode = 'timing')
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
system.clk_domain = SrcClockDomain(clock = options.sys_clock,
voltage_domain = system.voltage_domain)
#
# Command processor is not needed for the tester since we don't run real
# kernels. Setting it to zero disables the VIPER protocol from creating
# a command processor and its caches.
#
options.num_cp = 0
#
# Create the Ruby system
#
Ruby.create_system(options, False, system)
# Create a seperate clock domain for Ruby
system.ruby.clk_domain = SrcClockDomain(clock=options.ruby_clock,
voltage_domain=system.voltage_domain)
tester.num_cpus = len(system.ruby._cpu_ports)
#
# The tester is most effective when randomization is turned on and
# artifical delay is randomly inserted on messages
#
system.ruby.randomization = True
for ruby_port in system.ruby._cpu_ports:
# Assert that we got the right number of Ruby ports
assert(len(system.ruby._cpu_ports) == n_CUs + n_SQCs + n_Scalars)
#
# Tie the ruby tester ports to the ruby cpu read and write ports
#
if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
tester.cpuInstDataPort = ruby_port.slave
elif ruby_port.support_data_reqs:
tester.cpuDataPort = ruby_port.slave
elif ruby_port.support_inst_reqs:
tester.cpuInstPort = ruby_port.slave
# Do not automatically retry stalled Ruby requests
#
# Attach Ruby ports to the tester in the order:
# cpu_sequencers,
# vector_coalescers,
# sqc_sequencers,
# scalar_sequencers
#
# Note that this requires the protocol to create sequencers in this order
#
print("Attaching ruby ports to the tester")
for i, ruby_port in enumerate(system.ruby._cpu_ports):
ruby_port.no_retry_on_stall = True
#
# Tell each sequencer this is the ruby tester so that it
# copies the subblock back to the checker
#
ruby_port.using_ruby_tester = True
# -----------------------
# run simulation
# -----------------------
if i < n_CUs:
tester.cu_vector_ports = ruby_port.in_ports
tester.cu_token_ports = ruby_port.gmTokenPort
tester.max_cu_tokens = 4*n_WFs
elif i < (n_CUs + n_SQCs):
tester.cu_sqc_ports = ruby_port.in_ports
else:
tester.cu_scalar_ports = ruby_port.in_ports
root = Root( full_system = False, system = system )
root.system.mem_mode = 'timing'
i += 1
#
# No CPU threads are needed for GPU tester
#
tester.cpu_threads = []
#
# Create GPU wavefronts
#
thread_clock = SrcClockDomain(clock = '1GHz',
voltage_domain = system.voltage_domain)
wavefronts = []
g_thread_idx = 0
print("Creating %i WFs attached to %i CUs" % \
(n_CUs * tester.wavefronts_per_cu, n_CUs))
for cu_idx in range(n_CUs):
for wf_idx in range(tester.wavefronts_per_cu):
wavefronts.append(GpuWavefront(thread_id = g_thread_idx,
cu_id = cu_idx,
num_lanes = options.wf_size,
clk_domain = thread_clock,
deadlock_threshold = \
tester_deadlock_threshold))
g_thread_idx += 1
tester.wavefronts = wavefronts
#
# Run simulation
#
root = Root(full_system = False, system = system)
# Not much point in this being higher than the L1 latency
m5.ticks.setGlobalFrequency('1ns')
# instantiate configuration
# Instantiate configuration
m5.instantiate()
# simulate until program terminates
exit_event = m5.simulate(options.abs_max_tick)
# Simulate until tester completes
exit_event = m5.simulate()
print('Exiting @ tick', m5.curTick(), 'because', exit_event.getCause())
print('Exiting tick: ', m5.curTick())
print('Exiting because ', exit_event.getCause())

View File

@@ -0,0 +1,39 @@
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
from m5.proxy import *
from m5.objects.GpuThread import GpuThread
class CpuThread(GpuThread):
type = 'CpuThread'
cxx_header = "cpu/testers/gpu_ruby_test/cpu_thread.hh"

View File

@@ -0,0 +1,42 @@
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from m5.objects.ClockedObject import ClockedObject
from m5.params import *
from m5.proxy import *
class GpuThread(ClockedObject):
type = 'GpuThread'
abstract = True
cxx_header = "cpu/testers/gpu_ruby_test/gpu_thread.hh"
thread_id = Param.Int("Unique GpuThread ID")
num_lanes = Param.Int("Number of lanes this thread has")
deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")

View File

@@ -0,0 +1,40 @@
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
from m5.proxy import *
from m5.objects.GpuThread import GpuThread
class GpuWavefront(GpuThread):
type = 'GpuWavefront'
cxx_header = "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
cu_id = Param.Int("Compute Unit ID")

View File

@@ -0,0 +1,64 @@
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from m5.objects.ClockedObject import ClockedObject
from m5.params import *
from m5.proxy import *
class ProtocolTester(ClockedObject):
type = 'ProtocolTester'
cxx_header = "cpu/testers/gpu_ruby_test/protocol_tester.hh"
cpu_ports = VectorRequestPort("Ports for CPUs")
cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
cpu_threads = VectorParam.CpuThread("All cpus")
wavefronts = VectorParam.GpuWavefront("All wavefronts")
num_atomic_locations = Param.Int(2, "Number of atomic locations")
num_normal_locs_per_atomic = Param.Int(1000, \
"Number of normal locations per atomic")
episode_length = Param.Int(10, "Number of actions per episode")
max_num_episodes = Param.Int(20, "Maximum number of episodes")
debug_tester = Param.Bool(False, "Are we debugging the tester?")
random_seed = Param.Int(0, "Random seed number. Default value (0) means \
using runtime-specific value.")
log_file = Param.String("Log file's name")
system = Param.System(Parent.any, "System we belong to")

View File

@@ -0,0 +1,129 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
This directory contains a tester for gem5 GPU protocols. Unlike the Ruby random
teter, this tester does not rely on sequential consistency. Instead, it
assumes tested protocols supports release consistency.
----- Getting Started -----
To start using the tester quickly, you can use the following example command
line to get running immediately:
build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
--test-length=1000 --system-size=medium --cache-size=small
An overview of the main command line options is as follows. For all options
use `build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help`
or see the configuration file.
* --cache-size (small, large): Use smaller sizes for testing evict, etc.
* --system-size (small, medium, large): Effectively the number of threads in
the GPU model. Large size will have more contention. Larger
sizes are useful for checking contention.
* --episode-length (short, medium, long): Number of loads and stores in an
episode. Episodes will also have atomics mixed in. See below
for a definition of episode.
* --test-length (int): Number of episodes to execute. This will determine the
amount of time the tester runs for. Longer time will stress
the protocol harder.
The remainder of this file describes the theory behind the tester design and
a link to a more detailed research paper is provided at the end.
----- Theory Overview -----
The GPU Ruby tester creates a system consisting of both CPU threads and GPU
wavefronts. CPU threads are scalar, so there is one lane per CPU thread. GPU
wavefront may have multiple lanes. The number of lanes is initialized when
a thread/wavefront is created.
Each thread/wavefront executes a number of episodes. Each episode is a series
of memory actions (i.e., atomic, load, store, acquire and release). In a
wavefront, all lanes execute the same sequence of actions, but they may target
different addresses. One can think of an episode as a critical section which
is bounded by a lock acquire in the beginning and a lock release at the end. An
episode consists of actions in the following order:
1 - Atomic action
2 - Acquire action
3 - A number of load and store actions
4 - Release action
5 - Atomic action that targets the same address as (1) does
There are two separate set of addresses: atomic and non-atomic. Atomic actions
target only atomic addresses. Load and store actions target only non-atomic
addresses. Memory addresses are all 4-byte aligned in the tester.
To test false sharing cases in which both atomic and non-atomic addresses are
placed in the same cache line, we abstract out the concept of memory addresses
from the tester's perspective by introducing the concept of location. Locations
are numbered from 0 to N-1 (if there are N addresses). The first X locations
[0..X-1] are atomic locations, and the rest are non-atomic locations.
The 1-1 mapping between locations and addresses are randomly created when the
tester is initialized.
Per load and store action, its target location is selected so that there is no
data race in the generated stream of memory requests at any time during the
test. Since in Data-Race-Free model, the memory system's behavior is undefined
in data race cases, we exclude data race scenarios from our protocol test.
Once location per load/store action is determined, each thread/wavefront either
loads current value at the location or stores an incremental value to that
location. The tester maintains a table tracking all last writers and their
written values, so we know what value should be returned from a load and what
value should be written next at a particular location. Value returned from a
load must match with the value written by the last writer.
----- Directory Structure -----
ProtocolTester.hh/cc -- This is the main tester class that orchestrates the
entire test.
AddressManager.hh/cc -- This manages address space, randomly maps address to
location, generates locations for all episodes,
maintains per-location last writer and validates
values returned from load actions.
GpuThread.hh/cc -- This is abstract class for CPU threads and GPU
wavefronts. It generates and executes a series of
episodes.
CpuThread.hh/cc -- Thread class for CPU threads. Not fully implemented yet
GpuWavefront.hh/cc -- GpuThread class for GPU wavefronts.
Episode.hh/cc -- Class to encapsulate an episode, notably including
episode load/store structure and ordering.
For more detail, please see the following paper:
T. Ta, X. Zhang, A. Gutierrez and B. M. Beckmann, "Autonomous Data-Race-Free
GPU Testing," 2019 IEEE International Symposium on Workload Characterization
(IISWC), Orlando, FL, USA, 2019, pp. 81-92, doi:
10.1109/IISWC47752.2019.9042019.

View File

@@ -0,0 +1,54 @@
#
# Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
Import('*')
if not env['BUILD_GPU']:
Return()
if env['PROTOCOL'] == 'None':
Return()
SimObject('ProtocolTester.py')
SimObject('GpuThread.py')
SimObject('CpuThread.py')
SimObject('GpuWavefront.py')
Source('address_manager.cc')
Source('episode.cc')
Source('protocol_tester.cc')
Source('gpu_thread.cc')
Source('cpu_thread.cc')
Source('gpu_wavefront.cc')
DebugFlag('ProtocolTest')

View File

@@ -0,0 +1,431 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
#include <algorithm>
#include "base/intmath.hh"
#include "base/logging.hh"
#include "base/random.hh"
#include "base/trace.hh"
const int AddressManager::INVALID_VALUE = -1;
const int AddressManager::INVALID_LOCATION = -1;
AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
: numAtomicLocs(n_atomic_locs),
numLocsPerAtomic(n_normal_locs_per_atomic)
{
assert(numAtomicLocs > 0 && numLocsPerAtomic > 0);
numNormalLocs = numAtomicLocs * numLocsPerAtomic;
// generate random address map
randAddressMap.resize(numAtomicLocs + numNormalLocs);
for (Location i = 0; i < numAtomicLocs + numNormalLocs; ++i) {
// all addresses are sizeof(Value) (i.e., 4-byte) aligned
randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value)));
}
// randomly shuffle randAddressMap
std::random_shuffle(randAddressMap.begin(), randAddressMap.end());
// initialize atomic locations
// first and last normal location per atomic location
Location first, last;
for (Location atomic_loc = 0; atomic_loc < numAtomicLocs; ++atomic_loc) {
first = numAtomicLocs + numLocsPerAtomic * atomic_loc;
last = first + numLocsPerAtomic - 1;
atomicStructs.push_back(new AtomicStruct(atomic_loc, first, last));
}
// initialize log table
for (Location loc = 0; loc < numAtomicLocs + numNormalLocs; ++loc) {
logTable.push_back(new LastWriter());
}
}
AddressManager::~AddressManager()
{
for (AtomicStruct* atomic_struct : atomicStructs)
delete atomic_struct;
for (LastWriter* lw : logTable)
delete lw;
}
Addr
AddressManager::getAddress(Location loc)
{
assert(loc < numAtomicLocs + numNormalLocs && loc >= 0);
return randAddressMap[loc];
}
AddressManager::Location
AddressManager::getAtomicLoc()
{
Location ret_atomic_loc = random() % numAtomicLocs;
atomicStructs[ret_atomic_loc]->startLocSelection();
return ret_atomic_loc;
}
AddressManager::Location
AddressManager::getLoadLoc(Location atomic_loc)
{
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
return atomicStructs[atomic_loc]->getLoadLoc();
}
AddressManager::Location
AddressManager::getStoreLoc(Location atomic_loc)
{
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
return atomicStructs[atomic_loc]->getStoreLoc();
}
void
AddressManager::finishLocSelection(Location atomic_loc)
{
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
atomicStructs[atomic_loc]->endLocSelection();
}
void
AddressManager::releaseLocation(Location atomic_loc, Location loc)
{
assert(atomic_loc >= 0 && atomic_loc < numAtomicLocs);
atomicStructs[atomic_loc]->releaseLoc(loc);
}
std::string
AddressManager::printLastWriter(Location loc) const
{
return logTable[loc]->print();
}
// ------------------- AtomicStruct --------------------------
AddressManager::AtomicStruct::AtomicStruct(Location atomic_loc,
Location loc_begin,
Location loc_end)
{
// the location range must have at least 1 location
assert(loc_begin <= loc_end);
atomicLoc = atomic_loc;
arraySize = loc_end - loc_begin + 1;
locationBase = loc_begin;
// allocate an array of arrray_size
locArray = new Location[arraySize];
// initialize locArray & locProps
Location loc;
for (int offset = 0; offset < arraySize; ++offset) {
loc = locationBase + offset;
locArray[offset] = loc;
locProps.push_back(LocProperty(offset, 0));
}
// region (1) and (3) are initially empty
firstMark = 0;
secondMark = arraySize;
// no request made at this location so far
requestCount = 0;
}
AddressManager::AtomicStruct::~AtomicStruct()
{
delete[] locArray;
}
void
AddressManager::AtomicStruct::startLocSelection()
{
assert(firstMark >= 0);
assert(firstMark <= secondMark);
assert(secondMark <= arraySize);
// make sure loadStoreMap has been cleared
assert(loadStoreMap.empty());
// this atomic location is picked for Atomic_ACQ
// and Atomic_REL in an episode
requestCount += 2;
// add two expected values in expectedValues set
expectedValues.insert(requestCount - 1);
expectedValues.insert(requestCount - 2);
}
AddressManager::Location
AddressManager::AtomicStruct::getLoadLoc()
{
assert(firstMark >= 0);
assert(firstMark <= secondMark);
assert(secondMark <= arraySize);
if (firstMark == arraySize) {
// no location can be picked for a LD now, so return an empty location
return INVALID_LOCATION;
} else {
// we can pick any location btw
// locArray [firstMark : arraySize-1]
int range_size = arraySize - firstMark;
Location ret_loc = locArray[firstMark + random() % range_size];
// update loadStoreMap
LdStMap::iterator it = loadStoreMap.find(ret_loc);
if (it == loadStoreMap.end()) {
// insert a new entry to the map b/c the entry is not there yet
// to mark this location has been picked for a LD
loadStoreMap.insert(std::pair<Location, LdStBits>
(ret_loc, LdStBits(true,false)));
} else {
// otherwise, just update the LD bit
(it->second).first = true;
}
return ret_loc;
}
}
AddressManager::Location
AddressManager::AtomicStruct::getStoreLoc()
{
assert(firstMark >= 0);
assert(firstMark <= secondMark);
assert(secondMark <= arraySize);
if (firstMark == secondMark) {
// no location can be picked for a ST now, return an invalid location
return INVALID_LOCATION;
} else {
// we can pick any location btw [firstMark : secondMark-1]
int range_size = secondMark - firstMark;
Location ret_loc = locArray[firstMark + random() % range_size];
// update loadStoreMap
LdStMap::iterator it = loadStoreMap.find(ret_loc);
if (it == loadStoreMap.end()) {
// insert a new entry to the map b/c the entry is not there yet
// to mark this location has been picked for a ST
loadStoreMap.insert(std::pair<Location, LdStBits>
(ret_loc, LdStBits(false,true)));
} else {
// otherwise, just update the ST bit
(it->second).second = true;
}
return ret_loc;
}
}
// for each entry in loadStoreMap,
// if <LD_bit, ST_bit> == <1,0>
// - if the location is in (2), then move it to (3)
// - if the location is in (3), no move
// - otherwise, throw an error
// if <LD_bit, ST_bit> == <0,1> or <1,1>
// - move it from (2) to (1)
void
AddressManager::AtomicStruct::endLocSelection()
{
assert(firstMark >= 0);
assert(firstMark <= secondMark);
assert(secondMark <= arraySize);
for (auto& it : loadStoreMap) {
Location loc = it.first;
LdStBits p = it.second;
assert(loc >= locationBase && loc < locationBase + arraySize);
LocProperty& loc_prop = locProps[loc - locationBase];
if (p.first && !p.second) {
// this location has been picked for LD(s) but not ST
// it must be in either region (2) or (3)
assert(inSecondRegion(loc_prop.first) ||
inThirdRegion(loc_prop.first));
if (inSecondRegion(loc_prop.first)) {
// there is no owner of this location yet
assert(loc_prop.second == 0);
// pick the last location in (2) to swap
Location swapped_loc = locArray[secondMark - 1];
LocProperty& swapped_loc_prop =
locProps[swapped_loc - locationBase];
// swap loc and swapped_loc
swap(loc_prop, swapped_loc_prop);
// then, expand (3)
secondMark--;
}
// increment the location's number of owners
loc_prop.second++;
} else if (p.second) {
// this location has been picked for ST(s) and/or LD(s)
// it must be in region (2)
assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
// pick the first location in (2) to swap
Location swapped_loc = locArray[firstMark];
LocProperty& swapped_loc_prop =
locProps[swapped_loc - locationBase];
// swap loc and swapped_loc
swap(loc_prop, swapped_loc_prop);
// then, expand (1)
firstMark++;
// increment the location's number of owners
loc_prop.second++;
} else {
panic("Location in loadStoreMap but wasn't picked in any"
" action\n");
}
}
// clear the ld_st_map
loadStoreMap.clear();
}
void
AddressManager::AtomicStruct::releaseLoc(Location loc)
{
assert(loc >= locationBase && loc < locationBase + arraySize);
LocProperty& loc_prop = locProps[loc - locationBase];
if (inFirstRegion(loc_prop.first)) {
// this location must have exactly 1 owner
assert(loc_prop.second == 1);
// pick the last location in region 1 to swap
Location swapped_loc = locArray[firstMark - 1];
LocProperty& swapped_loc_prop = locProps[swapped_loc - locationBase];
// swap loc and swapped_loc
swap(loc_prop, swapped_loc_prop);
// then shrink (1)
firstMark--;
// reset the location's number of owners
loc_prop.second = 0;
} else if (inThirdRegion(loc_prop.first)) {
// this location must have at least 1 owner
assert(loc_prop.second >= 1);
if (loc_prop.second == 1) {
// pick the first location in region 3 to swap
Location swapped_loc = locArray[secondMark];
LocProperty& swapped_loc_prop =
locProps[swapped_loc - locationBase];
// swap loc and swapped_loc
swap(loc_prop, swapped_loc_prop);
// then shrink (3)
secondMark++;
}
// decrement the loc's number of owners
loc_prop.second--;
} else {
// some one else must already reset this counter
assert(inSecondRegion(loc_prop.first) && loc_prop.second == 0);
}
}
bool
AddressManager::AtomicStruct::isExpectedValue(Value val)
{
ExpectedValueSet::iterator it = expectedValues.find(val);
if (it == expectedValues.end()) {
std::stringstream exp_val_ss;
for (auto& val : expectedValues) {
exp_val_ss << " " << val;
}
warn("Expected return values are:\n\t%s\n", exp_val_ss.str());
return false;
}
// erase this value b/c it's done
expectedValues.erase(it);
return true;
}
void
AddressManager::AtomicStruct::swap(LocProperty& prop_1, LocProperty& prop_2)
{
int new_idx_1 = prop_2.first;
int new_idx_2 = prop_1.first;
// swap the two locations in locArray
Location tmp = locArray[prop_1.first];
locArray[prop_1.first] = locArray[prop_2.first];
locArray[prop_2.first] = tmp;
// update their new indices
prop_1.first = new_idx_1;
prop_2.first = new_idx_2;
}
// ------------------ log table ---------------------
void
AddressManager::updateLogTable(Location loc, int thread_id, int episode_id,
Value new_value, Tick cur_tick, int cu_id)
{
assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
logTable[loc]->update(thread_id, cu_id, episode_id, new_value, cur_tick);
}
AddressManager::Value
AddressManager::getLoggedValue(Location loc) const
{
assert(loc >= 0 && loc < numAtomicLocs + numNormalLocs);
return logTable[loc]->getLastStoredValue();
}
bool
AddressManager::validateAtomicResp(Location loc, Value ret_val)
{
assert(loc >= 0 && loc < numAtomicLocs);
return atomicStructs[loc]->isExpectedValue(ret_val);
}

View File

@@ -0,0 +1,274 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
#define CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "base/types.hh"
#include "sim/eventq.hh"
/*
* --- AddressManager has 3 main tasks ---
* (1) generate DRF request sequences
* (2) maintain internal log table
* (3) validate return values against ones in the log table
*
* A location is an abstract index of a unique real address.
* It's used internally within the tester only.
* randAddressMap has the mapping between a location and its real address.
*
* A value is an integer that a location in real memory can store.
* for now, we assume a value is 4-byte
*
* The location range (randAddressMap) has two distinct parts:
* Atomic locations: in the 1st part of randAddressMap &
* Non-atomic locations (or just locations): in the 2nd part
*/
/*
* --- DRF request sequence generation ---
* Each lane of an episode starts selecting its location by calling:
* (1) getAtomicLoc
* (2) getLoadLoc/getStoreLoc
* (3) finishLocSelection
*
* Each lane of an episode completes its executing by calling:
* releaseLocation for all locations it selected
*/
/*
* --- Internal structures ---
* There are multiple atomic structures, each of which corresponds
* to an atomic location.
*
* Each atomic structure manages a distinct range of locations in locArray
* This array is partitioned into 3 parts that are used to select locations
* for LDs and STs. Here is the location selecting rule:
* | (1) | (2) | (3) |
* - all locations in (1) cannot be picked for any LD and ST action
* - all locations in (2) can be picked for either LD or ST action
* - all locations in (3) can be picked for LD action only
*
* We maintain the 3 parts by 2 indices firstMark and secondMark.
* As locations are moved between partitions, both indices are updated
* accordingly.
* [0 .. firstMark-1] part (1)
* [firstMark .. secondMark-1] part (2)
* [secondMark .. arraySize-1] part (3)
*
* Each location has its context/property. locProps maintains
* contexts/properties of all locations. Context/property includes
* - current index of a location in locArray
* - the number of owners who are currently using the location
*
* To guarantee DRF constraints, the following conditions must hold
* - all locations in (1) have exactly 1 owner
* - all locations in (2) have exactly 0 owner
* - all locations in (3) have at least 1 owner
* - A LD request can randomly pick any location in (2) & (3)
* - A ST request can randomly pick any location in (2)
*
* loadStoreMap maintains all locations already selected for LDs/STs so far
*
* When endLocSelection is called (i.e., we've picked all locations for an
* episode), we need to move each selected location to its right partition.
* if LD_bit == 1 && ST_bit == 0 (i.e., picked for LDs), then move the
* location to (3) -> future LDs can pick it.
* if LD_bit == 0 && ST_bit == 1, then move the location to (1) -> NO future
* action can pick it until this episode is done.
* if LD_bit == 1 && ST_bit == 1, then move the location to (1) -> NO future
* action can pick it until this episode is done.
* clear the loadStoreMap
*/
class AddressManager
{
public:
AddressManager(int n_atomic_locs, int numNormalLocsPerAtomic);
~AddressManager();
typedef int32_t Value;
typedef int32_t Location;
// return the unique address mapped to a location
Addr getAddress(Location loc);
// return a unique atomic location & start picking locations
Location getAtomicLoc();
// return a random location for LD
Location getLoadLoc(Location atomic_loc);
// return a random location for ST
Location getStoreLoc(Location atomic_loc);
// finish picking locations
void finishLocSelection(Location atomic_loc);
// an episode is done, release location I've picked
void releaseLocation(Location atomic_loc, Location loc);
// update a log table entry with a given set of values
void updateLogTable(Location loc, int threadId, int episodeId,
Value new_value, Tick curTick, int cuId = -1);
// return the current value in the log table
Value getLoggedValue(Location loc) const;
// validate atomic response
bool validateAtomicResp(Location loc, Value ret_val);
std::string printLastWriter(Location loc) const;
static const int INVALID_VALUE;
static const int INVALID_LOCATION;
private:
class LastWriter
{
public:
LastWriter()
: threadId(-1), cuId(-1), episodeId(-1), value(0),
writeTick(0)
{ }
const std::string print() const
{
return "(GpuThread ID " + std::to_string(threadId) +
", CU ID " + std::to_string(cuId) +
", Episode ID " + std::to_string(episodeId) +
", Value " + std::to_string(value) +
", Tick " + std::to_string(writeTick) +
")";
}
void update(int _thread, int _cu, int _episode, Value _value,
Tick _tick)
{
threadId = _thread;
cuId = _cu;
episodeId = _episode;
value = _value;
writeTick = _tick;
}
Value getLastStoredValue() const { return value; }
private:
int threadId;
int cuId;
int episodeId;
Value value;
Tick writeTick;
};
class AtomicStruct
{
public:
AtomicStruct(Location atom_loc, Location loc_begin, Location loc_end);
~AtomicStruct();
// functions picking locations for LD/ST/ATOMIC ops
void startLocSelection();
Location getLoadLoc();
Location getStoreLoc();
void endLocSelection();
// an episode completed its actions
// return locations to their correct positions
void releaseLoc(Location loc);
// is the value what we expect?
bool isExpectedValue(Value val);
private:
Location atomicLoc;
Location locationBase;
// array storing all locations this structure is managing
Location* locArray;
int firstMark, secondMark;
int arraySize;
// a vector of location's properties
typedef std::pair<int, int> LocProperty;
typedef std::vector<LocProperty> LocPropTable;
LocPropTable locProps;
// a temporary map of location and its LD/ST selection
typedef std::pair<bool, bool> LdStBits;
typedef std::unordered_map<Location, LdStBits> LdStMap;
LdStMap loadStoreMap;
// number of atomic requests at this location so far
int requestCount;
// a set of expected values
// when we request the first n atomic ops, we expect to receive n
// return values from [0 .. n-1]
typedef std::unordered_set<Value> ExpectedValueSet;
ExpectedValueSet expectedValues;
// swap two locations in locArray
void swap(LocProperty& prop_1, LocProperty& prop_2);
bool inFirstRegion(int idx) const
{
return (idx >= 0 && idx < firstMark);
}
bool inSecondRegion(int idx) const
{
return (idx >= firstMark && idx < secondMark);
}
bool inThirdRegion(int idx) const
{
return (idx >= secondMark && idx < arraySize);
}
};
// number of atomic locations
int numAtomicLocs;
// number of normal/non-atomic locations per atomic structure
int numLocsPerAtomic;
// total number of non-atomic locations
int numNormalLocs;
// location - address mapping
typedef std::vector<Addr> AddressMap;
AddressMap randAddressMap;
// a list of atomic structures
typedef std::vector<AtomicStruct*> AtomicStructTable;
AtomicStructTable atomicStructs;
// internal log table
typedef std::vector<LastWriter*> LogTable;
LogTable logTable;
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_ADDRESS_MANAGER_HH_ */

View File

@@ -0,0 +1,123 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
#include "debug/ProtocolTest.hh"
CpuThread::CpuThread(const Params &p)
:GpuThread(p)
{
threadName = "CpuThread(Thread ID " + std::to_string(threadId) + ")";
threadEvent.setDesc("CpuThread tick");
assert(numLanes == 1);
}
CpuThread*
CpuThreadParams::create() const
{
return new CpuThread(*this);
}
void
CpuThread::issueLoadOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::LOAD);
// we should not have any outstanding fence or atomic op at this point
assert(pendingFenceCount == 0);
assert(pendingAtomicCount == 0);
fatal("CpuThread::issueLoadOps - not yet implemented");
}
void
CpuThread::issueStoreOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::STORE);
// we should not have any outstanding fence or atomic op at this point
assert(pendingFenceCount == 0);
assert(pendingAtomicCount == 0);
fatal("CpuThread::issueStoreOps - not yet implemented");
}
void
CpuThread::issueAtomicOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
fatal("CpuThread::issueAtomicOps - not yet implemented");
}
void
CpuThread::issueAcquireOp()
{
DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n");
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
// no-op: Acquire does not apply to CPU threads
}
void
CpuThread::issueReleaseOp()
{
DPRINTF(ProtocolTest, "Issuing Release Op ...\n");
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::RELEASE);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
// no-op: Release does not apply to CPU threads
}
void
CpuThread::hitCallback(PacketPtr pkt)
{
fatal("CpuThread::hitCallback - not yet implemented");
}

View File

@@ -0,0 +1,61 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
#define CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
#include "params/CpuThread.hh"
#include "sim/clocked_object.hh"
class CpuThread : public GpuThread
{
public:
typedef CpuThreadParams Params;
CpuThread(const Params &p);
virtual ~CpuThread() = default;
typedef AddressManager::Location Location;
typedef AddressManager::Value Value;
void hitCallback(PacketPtr pkt);
protected:
void issueLoadOps();
void issueStoreOps();
void issueAtomicOps();
void issueAcquireOp();
void issueReleaseOp();
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_CPU_THREAD_HH_ */

View File

@@ -0,0 +1,321 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/testers/gpu_ruby_test/episode.hh"
#include <fstream>
#include <unordered_set>
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
Episode::Episode(ProtocolTester* _tester, GpuThread* _thread, int num_loads,
int num_stores)
: tester(_tester),
thread(_thread),
numLoads(num_loads),
numStores(num_stores),
nextActionIdx(0)
{
assert(tester && thread);
episodeId = tester->getNextEpisodeID();
numLanes = thread->getNumLanes();
assert(numLanes > 0);
addrManager = tester->getAddressManager();
assert(addrManager);
atomicLocs.resize(numLanes, AddressManager::INVALID_LOCATION);
// generate a sequence of actions
initActions();
isActive = true;
DPRINTFN("Episode %d\n", episodeId);
}
Episode::~Episode()
{
for (Episode::Action* action : actions) {
assert(action);
delete action;
}
}
const Episode::Action*
Episode::peekCurAction() const
{
if (nextActionIdx < actions.size())
return actions[nextActionIdx];
else
return nullptr;
}
void
Episode::popAction()
{
assert(nextActionIdx < actions.size());
nextActionIdx++;
}
void
Episode::initActions()
{
// first, push Atomic & then Acquire action
actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
actions.push_back(new Action(Action::Type::ACQUIRE, numLanes));
// second, push a number of LD/ST actions
int num_loads = numLoads;
int num_stores = numStores;
while ((num_loads + num_stores) > 0) {
switch (random() % 2) {
case 0: // Load
if (num_loads > 0) {
actions.push_back(new Action(Action::Type::LOAD,
numLanes));
num_loads--;
}
break;
case 1: // Store
if (num_stores > 0) {
actions.push_back(new Action(Action::Type::STORE,
numLanes));
num_stores--;
}
break;
default:
assert(false);
}
}
// last, push an Release & then Atomic action
actions.push_back(new Action(Action::Type::RELEASE, numLanes));
actions.push_back(new Action(Action::Type::ATOMIC, numLanes));
// for each lane, pick a list of locations
Location normal_loc;
for (int lane = 0; lane < numLanes; ++lane) {
normal_loc = AddressManager::INVALID_LOCATION;
// first, we select atomic loc for this lane
// atomic loc for this lane should not have been picked yet
assert(atomicLocs[lane] == AddressManager::INVALID_LOCATION);
// pick randomly an atomic location
atomicLocs[lane] = addrManager->getAtomicLoc();
assert(atomicLocs[lane] >= 0);
// go through each action in this lane and set its location
for (Action* action : actions) {
assert(action);
switch (action->getType()) {
case Action::Type::ATOMIC:
action->setLocation(lane, atomicLocs[lane]);
break;
case Action::Type::LOAD:
// pick randomly a normal location
normal_loc = addrManager->
getLoadLoc(atomicLocs[lane]);
assert(normal_loc >= AddressManager::INVALID_LOCATION);
if (normal_loc != AddressManager::INVALID_LOCATION) {
// check DRF
if (!tester->checkDRF(atomicLocs[lane],
normal_loc, false) ||
!this->checkDRF(atomicLocs[lane], normal_loc,
false, lane)) {
panic("GpuTh %d - Data race detected. STOPPED!\n",
thread->getGpuThreadId());
}
}
action->setLocation(lane, normal_loc);
break;
case Action::Type::STORE:
// pick randomly a normal location
normal_loc = addrManager->
getStoreLoc(atomicLocs[lane]);
assert(normal_loc >= AddressManager::INVALID_LOCATION);
if (normal_loc != AddressManager::INVALID_LOCATION) {
// check DRF
if (!tester->checkDRF(atomicLocs[lane],
normal_loc, true) ||
!this->checkDRF(atomicLocs[lane], normal_loc,
true, lane)) {
panic("GpuTh %d - Data race detected. STOPPED!\n",
thread->getGpuThreadId());
}
}
action->setLocation(lane, normal_loc);
break;
case Action::Type::ACQUIRE:
case Action::Type::RELEASE:
// no op
break;
default:
panic("Invalid action type\n");
}
}
addrManager->finishLocSelection(atomicLocs[lane]);
}
}
void
Episode::completeEpisode()
{
// release all locations this episode has picked and used
Location atomic_loc, normal_loc;
for (int lane = 0; lane < numLanes; ++lane) {
atomic_loc = AddressManager::INVALID_LOCATION;
normal_loc = AddressManager::INVALID_LOCATION;
std::unordered_set<Location> unique_loc_set;
for (Action* action : actions) {
assert(action);
if (action->isAtomicAction()) {
if (atomic_loc == AddressManager::INVALID_LOCATION) {
atomic_loc = action->getLocation(lane);
} else {
// both atomic ops in the same lane must be
// at the same location
assert(atomic_loc == action->getLocation(lane));
}
} else if (!action->isMemFenceAction()) {
assert(atomic_loc >= 0);
normal_loc = action->getLocation(lane);
if (normal_loc >= 0)
unique_loc_set.insert(normal_loc);
}
}
// each unique loc can be released only once
for (Location loc : unique_loc_set)
addrManager->releaseLocation(atomic_loc, loc);
}
// this episode is no longer active
isActive = false;
}
bool
Episode::checkDRF(Location atomic_loc, Location loc, bool isStore,
int max_lane) const
{
assert(atomic_loc != AddressManager::INVALID_LOCATION);
assert(loc != AddressManager::INVALID_LOCATION);
assert(max_lane <= numLanes);
for (int lane = 0; lane < max_lane; ++lane) {
if (atomic_loc == atomicLocs[lane]) {
for (const Action* action : actions) {
if (!action->isAtomicAction() &&
!action->isMemFenceAction()) {
if (isStore && loc == action->getLocation(lane)) {
warn("ST at location %d races against thread %d\n",
loc, thread->getGpuThreadId());
return false;
} else if (!isStore &&
action->getType() == Action::Type::STORE &&
loc == action->getLocation(lane)) {
warn("LD at location %d races against thread %d\n",
loc, thread->getGpuThreadId());
return false;
}
}
}
}
}
return true;
}
// -------------------- Action class ----------------------------
Episode::Action::Action(Type t, int num_lanes)
: type(t),
numLanes(num_lanes)
{
assert(numLanes > 0);
locations.resize(numLanes);
for (Location &loc : locations) loc = AddressManager::INVALID_LOCATION;
}
void
Episode::Action::setLocation(int lane, Location loc)
{
assert(lane >= 0 && lane < numLanes);
locations[lane] = loc;
}
AddressManager::Location
Episode::Action::getLocation(int lane) const
{
assert(lane >= 0 && lane < numLanes);
return locations[lane];
}
bool
Episode::Action::isAtomicAction() const
{
return (type == Type::ATOMIC);
}
bool
Episode::Action::isMemFenceAction() const
{
return (type == Type::ACQUIRE || type == Type::RELEASE);
}
const std::string
Episode::Action::printType() const
{
if (type == Type::ACQUIRE)
return "ACQUIRE";
else if (type == Type::RELEASE)
return "RELEASE";
else if (type == Type::ATOMIC)
return "ATOMIC";
else if (type == Type::LOAD)
return "LOAD";
else if (type == Type::STORE)
return "STORE";
else
panic("Invalid action type\n");
}

View File

@@ -0,0 +1,126 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
#define CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_
#include <vector>
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
class ProtocolTester;
class GpuThread;
class Episode
{
public:
typedef AddressManager::Location Location;
typedef AddressManager::Value Value;
class Action {
public:
enum class Type {
ACQUIRE,
RELEASE,
ATOMIC,
LOAD,
STORE,
};
Action(Type t, int num_lanes);
~Action() {}
Type getType() const { return type; }
void setLocation(int lane, Location loc);
Location getLocation(int lane) const;
bool isAtomicAction() const;
bool isMemFenceAction() const;
const std::string printType() const;
private:
Type type;
int numLanes;
typedef std::vector<Location> LocationList;
LocationList locations;
};
Episode(ProtocolTester* tester, GpuThread* thread, int num_loads,
int num_stores);
~Episode();
// return episode id
int getEpisodeId() const { return episodeId; }
// return the action at the head of the action queue
const Action* peekCurAction() const;
// pop the action at the head of the action queue
void popAction();
// check if there is more action to be issued in this episode
bool hasMoreActions() const { return nextActionIdx < actions.size();}
// complete this episode by releasing all locations & updating st effects
void completeEpisode();
// check if this episode is executing
bool isEpsActive() const { return isActive; }
// check if the input episode and this one have any data race
bool checkDRF(Location atomic_loc, Location loc, bool isStore,
int max_lane) const;
private:
// pointers to tester, thread and address amanger structures
ProtocolTester *tester;
GpuThread *thread;
AddressManager *addrManager;
// a unique episode id
int episodeId;
// list of actions in this episode
typedef std::vector<Action*> ActionList;
ActionList actions;
// list of atomic locations picked for this episode
typedef std::vector<Location> AtomicLocationList;
AtomicLocationList atomicLocs;
// is a thread running this episode?
bool isActive;
// episode length = num_loads + num_stores
int numLoads;
int numStores;
// index of the next action in actions
int nextActionIdx;
// number of lanes in this thread
int numLanes;
// randomly generate actions in this episode
void initActions();
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_EPISODE_HH_ */

View File

@@ -0,0 +1,430 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
#include <fstream>
#include "debug/ProtocolTest.hh"
GpuThread::GpuThread(const Params &p)
: ClockedObject(p),
threadEvent(this, "GpuThread tick"),
deadlockCheckEvent(this),
threadId(p.thread_id),
numLanes(p.num_lanes),
tester(nullptr), addrManager(nullptr), port(nullptr),
scalarPort(nullptr), sqcPort(nullptr), curEpisode(nullptr),
curAction(nullptr), pendingLdStCount(0), pendingFenceCount(0),
pendingAtomicCount(0), lastActiveCycle(Cycles(0)),
deadlockThreshold(p.deadlock_threshold)
{
}
GpuThread::~GpuThread()
{
for (auto ep : episodeHistory) {
assert(ep != nullptr);
delete ep;
}
}
void
GpuThread::wakeup()
{
// this thread is waken up by one of the following events
// - hitCallback is called
// - a new episode is created
// check if this is the first episode in this thread
if (curEpisode == nullptr) {
issueNewEpisode();
assert(curEpisode);
}
if (isNextActionReady()) {
// isNextActionReady should check if the action list is empty
assert(curAction != nullptr);
// issue the next action
issueNextAction();
} else {
// check for completion of the current episode
// completion = no outstanding requests + not having more actions
if (!curEpisode->hasMoreActions() &&
pendingLdStCount == 0 &&
pendingFenceCount == 0 &&
pendingAtomicCount == 0) {
curEpisode->completeEpisode();
// check if it's time to stop the tester
if (tester->checkExit()) {
// no more event is scheduled for this thread
return;
}
// issue the next episode
issueNewEpisode();
assert(curEpisode);
// now we get a new episode
// let's wake up the thread in the next cycle
if (!threadEvent.scheduled()) {
scheduleWakeup();
}
}
}
}
void
GpuThread::scheduleWakeup()
{
assert(!threadEvent.scheduled());
schedule(threadEvent, nextCycle());
}
void
GpuThread::scheduleDeadlockCheckEvent()
{
// after this first schedule, the deadlock event is scheduled by itself
assert(!deadlockCheckEvent.scheduled());
schedule(deadlockCheckEvent, nextCycle());
}
void
GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
ProtocolTester::SeqPort *_port,
ProtocolTester::SeqPort *_scalarPort,
ProtocolTester::SeqPort *_sqcPort)
{
tester = _tester;
port = _port;
scalarPort = _scalarPort;
sqcPort = _sqcPort;
assert(tester && port);
addrManager = tester->getAddressManager();
assert(addrManager);
}
void
GpuThread::issueNewEpisode()
{
int num_reg_loads = random() % tester->getEpisodeLength();
int num_reg_stores = tester->getEpisodeLength() - num_reg_loads;
// create a new episode
curEpisode = new Episode(tester, this, num_reg_loads, num_reg_stores);
episodeHistory.push_back(curEpisode);
}
bool
GpuThread::isNextActionReady()
{
if (!curEpisode->hasMoreActions()) {
return false;
} else {
curAction = curEpisode->peekCurAction();
switch(curAction->getType()) {
case Episode::Action::Type::ATOMIC:
// an atomic action must wait for all previous requests
// to complete
if (pendingLdStCount == 0 &&
pendingFenceCount == 0 &&
pendingAtomicCount == 0) {
return true;
}
return false;
case Episode::Action::Type::ACQUIRE:
// we should not see any outstanding ld_st or fence here
assert(pendingLdStCount == 0 &&
pendingFenceCount == 0);
// an acquire action must wait for all previous atomic
// requests to complete
if (pendingAtomicCount == 0) {
return true;
}
return false;
case Episode::Action::Type::RELEASE:
// we should not see any outstanding atomic or fence here
assert(pendingAtomicCount == 0 &&
pendingFenceCount == 0);
// a release action must wait for all previous ld/st
// requests to complete
if (pendingLdStCount == 0) {
return true;
}
return false;
case Episode::Action::Type::LOAD:
case Episode::Action::Type::STORE:
// we should not see any outstanding atomic here
assert(pendingAtomicCount == 0);
// can't issue if there is a pending fence
if (pendingFenceCount > 0) {
return false;
}
// a Load or Store is ready if it doesn't overlap
// with any outstanding request
for (int lane = 0; lane < numLanes; ++lane) {
Location loc = curAction->getLocation(lane);
if (loc != AddressManager::INVALID_LOCATION) {
Addr addr = addrManager->getAddress(loc);
if (outstandingLoads.find(addr) !=
outstandingLoads.end()) {
return false;
}
if (outstandingStores.find(addr) !=
outstandingStores.end()) {
return false;
}
if (outstandingAtomics.find(addr) !=
outstandingAtomics.end()) {
// this is not an atomic action, so the address
// should not be in outstandingAtomics list
assert(false);
}
}
}
return true;
default:
panic("The tester got an invalid action\n");
}
}
}
void
GpuThread::issueNextAction()
{
switch(curAction->getType()) {
case Episode::Action::Type::ATOMIC:
issueAtomicOps();
break;
case Episode::Action::Type::ACQUIRE:
issueAcquireOp();
break;
case Episode::Action::Type::RELEASE:
issueReleaseOp();
break;
case Episode::Action::Type::LOAD:
issueLoadOps();
break;
case Episode::Action::Type::STORE:
issueStoreOps();
break;
default:
panic("The tester got an invalid action\n");
}
// the current action has been issued, pop it from the action list
curEpisode->popAction();
lastActiveCycle = curCycle();
// we may be able to schedule the next action
// just wake up this thread in the next cycle
if (!threadEvent.scheduled()) {
scheduleWakeup();
}
}
void
GpuThread::addOutstandingReqs(OutstandingReqTable& req_table, Addr address,
int lane, Location loc, Value stored_val)
{
OutstandingReqTable::iterator it = req_table.find(address);
OutstandingReq req(lane, loc, stored_val, curCycle());
if (it == req_table.end()) {
// insert a new list of requests for this address
req_table.insert(std::pair<Addr, OutstandingReqList>(address,
OutstandingReqList(1, req)));
} else {
// add a new request
(it->second).push_back(req);
}
}
GpuThread::OutstandingReq
GpuThread::popOutstandingReq(OutstandingReqTable& req_table, Addr addr)
{
OutstandingReqTable::iterator it = req_table.find(addr);
// there must be exactly one list of requests for this address in the table
assert(it != req_table.end());
// get the request list
OutstandingReqList& req_list = it->second;
assert(!req_list.empty());
// save a request
OutstandingReq ret_req = req_list.back();
// remove the request from the list
req_list.pop_back();
// if the list is now empty, remove it from req_table
if (req_list.empty()) {
req_table.erase(it);
}
return ret_req;
}
void
GpuThread::validateAtomicResp(Location loc, int lane, Value ret_val)
{
if (!addrManager->validateAtomicResp(loc, ret_val)) {
std::stringstream ss;
Addr addr = addrManager->getAddress(loc);
// basic info
ss << threadName << ": Atomic Op returned unexpected value\n"
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
<< "\tLane ID " << lane << "\n"
<< "\tAddress " << printAddress(addr) << "\n"
<< "\tAtomic Op's return value " << ret_val << "\n";
// print out basic info
warn("%s\n", ss.str());
// TODO add more detailed info
// dump all error info and exit the simulation
tester->dumpErrorLog(ss);
}
}
void
GpuThread::validateLoadResp(Location loc, int lane, Value ret_val)
{
if (ret_val != addrManager->getLoggedValue(loc)) {
std::stringstream ss;
Addr addr = addrManager->getAddress(loc);
// basic info
ss << threadName << ": Loaded value is not consistent with "
<< "the last stored value\n"
<< "\tGpuThread " << threadId << "\n"
<< "\tEpisode " << curEpisode->getEpisodeId() << "\n"
<< "\tLane ID " << lane << "\n"
<< "\tAddress " << printAddress(addr) << "\n"
<< "\tLoaded value " << ret_val << "\n"
<< "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
// print out basic info
warn("%s\n", ss.str());
// TODO add more detailed info
// dump all error info and exit the simulation
tester->dumpErrorLog(ss);
}
}
bool
GpuThread::checkDRF(Location atomic_loc, Location loc, bool isStore) const
{
if (curEpisode && curEpisode->isEpsActive()) {
// check against the current episode this thread is executing
return curEpisode->checkDRF(atomic_loc, loc, isStore, numLanes);
}
return true;
}
void
GpuThread::checkDeadlock()
{
if ((curCycle() - lastActiveCycle) > deadlockThreshold) {
// deadlock detected
std::stringstream ss;
ss << threadName << ": Deadlock detected\n"
<< "\tLast active cycle: " << lastActiveCycle << "\n"
<< "\tCurrent cycle: " << curCycle() << "\n"
<< "\tDeadlock threshold: " << deadlockThreshold << "\n";
// print out basic info
warn("%s\n", ss.str());
// dump all error info and exit the simulation
tester->dumpErrorLog(ss);
} else if (!tester->checkExit()) {
// schedule a future deadlock check event
assert(!deadlockCheckEvent.scheduled());
schedule(deadlockCheckEvent,
deadlockThreshold * clockPeriod() + curTick());
}
}
void
GpuThread::printOutstandingReqs(const OutstandingReqTable& table,
std::stringstream& ss) const
{
Cycles cur_cycle = curCycle();
for (const auto& m : table) {
for (const auto& req : m.second) {
ss << "\t\t\tAddr " << printAddress(m.first)
<< ": delta (curCycle - issueCycle) = "
<< (cur_cycle - req.issueCycle) << std::endl;
}
}
}
void
GpuThread::printAllOutstandingReqs(std::stringstream& ss) const
{
// dump all outstanding requests of this thread
ss << "\t\tOutstanding Loads:\n";
printOutstandingReqs(outstandingLoads, ss);
ss << "\t\tOutstanding Stores:\n";
printOutstandingReqs(outstandingStores, ss);
ss << "\t\tOutstanding Atomics:\n";
printOutstandingReqs(outstandingAtomics, ss);
ss << "\t\tNumber of outstanding acquires & releases: "
<< pendingFenceCount << std::endl;
}

View File

@@ -0,0 +1,199 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* GPU thread issues requests to and receives responses from Ruby memory
*/
#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
#define CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
#include "cpu/testers/gpu_ruby_test/episode.hh"
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "sim/clocked_object.hh"
class GpuThread : public ClockedObject
{
public:
typedef GpuThreadParams Params;
GpuThread(const Params &p);
virtual ~GpuThread();
typedef AddressManager::Location Location;
typedef AddressManager::Value Value;
void wakeup();
void scheduleWakeup();
void checkDeadlock();
void scheduleDeadlockCheckEvent();
void attachGpuThreadToPorts(ProtocolTester *_tester,
ProtocolTester::SeqPort *_port,
ProtocolTester::SeqPort *_sqcPort = nullptr,
ProtocolTester::SeqPort *_scalarPort = nullptr);
const std::string& getName() const { return threadName; }
// must be implemented by a child class
virtual void hitCallback(PacketPtr pkt) = 0;
int getGpuThreadId() const { return threadId; }
int getNumLanes() const { return numLanes; }
// check if the input location would satisfy DRF constraint
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
void printAllOutstandingReqs(std::stringstream& ss) const;
protected:
class GpuThreadEvent : public Event
{
private:
GpuThread* thread;
std::string desc;
public:
GpuThreadEvent(GpuThread* _thread, std::string _description)
: Event(CPU_Tick_Pri), thread(_thread), desc(_description)
{}
void setDesc(std::string _description) { desc = _description; }
void process() { thread->wakeup(); }
const std::string name() { return desc; }
};
GpuThreadEvent threadEvent;
class DeadlockCheckEvent : public Event
{
private:
GpuThread* thread;
public:
DeadlockCheckEvent(GpuThread* _thread)
: Event(CPU_Tick_Pri), thread(_thread)
{}
void process() { thread->checkDeadlock(); }
const std::string name() const { return "Tester deadlock check"; }
};
DeadlockCheckEvent deadlockCheckEvent;
struct OutstandingReq
{
int lane;
Location origLoc;
Value storedValue;
Cycles issueCycle;
OutstandingReq(int _lane, Location _loc, Value _val, Cycles _cycle)
: lane(_lane), origLoc(_loc), storedValue(_val), issueCycle(_cycle)
{}
~OutstandingReq()
{}
};
// the unique global id of this thread
int threadId;
// width of this thread (1 for cpu thread & wf size for gpu wavefront)
int numLanes;
// thread name
std::string threadName;
// pointer to the main tester
ProtocolTester *tester;
// pointer to the address manager
AddressManager *addrManager;
ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
// a list of issued episodes sorted by time
// the last episode in the list is the current episode
typedef std::vector<Episode*> EpisodeHistory;
EpisodeHistory episodeHistory;
// pointer to the current episode
Episode *curEpisode;
// pointer to the current action
const Episode::Action *curAction;
// number of outstanding requests that are waiting for their responses
int pendingLdStCount;
int pendingFenceCount;
int pendingAtomicCount;
// last cycle when there is an event in this thread
Cycles lastActiveCycle;
Cycles deadlockThreshold;
// a per-address list of outstanding requests
typedef std::vector<OutstandingReq> OutstandingReqList;
typedef std::unordered_map<Addr, OutstandingReqList> OutstandingReqTable;
OutstandingReqTable outstandingLoads;
OutstandingReqTable outstandingStores;
OutstandingReqTable outstandingAtomics;
void issueNewEpisode();
// check if the next action in the current episode satisfies all wait_cnt
// constraints and is ready to issue
bool isNextActionReady();
void issueNextAction();
// issue Ops to Ruby memory
// must be implemented by a child class
virtual void issueLoadOps() = 0;
virtual void issueStoreOps() = 0;
virtual void issueAtomicOps() = 0;
virtual void issueAcquireOp() = 0;
virtual void issueReleaseOp() = 0;
// add an outstanding request to its corresponding table
void addOutstandingReqs(OutstandingReqTable& req_table, Addr addr,
int lane, Location loc,
Value stored_val = AddressManager::INVALID_VALUE);
// pop an outstanding request from the input table
OutstandingReq popOutstandingReq(OutstandingReqTable& req_table,
Addr address);
// validate all atomic responses
void validateAtomicResp(Location loc, int lane, Value ret_val);
// validate all Load responses
void validateLoadResp(Location loc, int lane, Value ret_val);
void printOutstandingReqs(const OutstandingReqTable& table,
std::stringstream& ss) const;
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_THREAD_HH_ */

View File

@@ -0,0 +1,377 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
#include "debug/ProtocolTest.hh"
GpuWavefront::GpuWavefront(const Params &p)
: GpuThread(p), cuId(p.cu_id)
{
threadName = "GpuWavefront(GpuThread ID = " + std::to_string(threadId) +
", CU ID = " + std::to_string(cuId) + ")";
threadEvent.setDesc("GpuWavefront tick");
}
GpuWavefront::~GpuWavefront()
{
}
GpuWavefront*
GpuWavefrontParams::create() const
{
return new GpuWavefront(*this);
}
void
GpuWavefront::issueLoadOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::LOAD);
// we should not have any outstanding fence or atomic op at this point
assert(pendingFenceCount == 0);
assert(pendingAtomicCount == 0);
for (int lane = 0; lane < numLanes; ++lane) {
Location location = curAction->getLocation(lane);
assert(location >= AddressManager::INVALID_LOCATION);
// Make a request if we do not get an INVALID_LOCATION for this lane.
if (location >= 0) {
Addr address = addrManager->getAddress(location);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
printAddress(address));
int load_size = sizeof(Value);
// for now, assert address is 4-byte aligned
assert(address % load_size == 0);
auto req = std::make_shared<Request>(address, load_size,
0, tester->requestorId(),
0, threadId, nullptr);
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
uint8_t* data = new uint8_t[load_size];
pkt->dataDynamic(data);
pkt->senderState = new ProtocolTester::SenderState(this);
// increment the number of outstanding ld_st requests
pendingLdStCount++;
if (!port->sendTimingReq(pkt)) {
panic("Not expected failed sendTimingReq\n");
}
// insert an outstanding load
addOutstandingReqs(outstandingLoads, address, lane, location);
}
}
}
void
GpuWavefront::issueStoreOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::STORE);
// we should not have any outstanding fence or atomic op at this point
assert(pendingFenceCount == 0);
assert(pendingAtomicCount == 0);
for (int lane = 0; lane < numLanes; ++lane) {
Location location = curAction->getLocation(lane);
assert(location >= AddressManager::INVALID_LOCATION);
// Make a request if we do not get an INVALID_LOCATION for this lane.
if (location >= 0) {
// prepare the next value to store
Value new_value = addrManager->getLoggedValue(location) + 1;
Addr address = addrManager->getAddress(location);
// must be aligned with store size
assert(address % sizeof(Value) == 0);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
"Value %d\n", this->getName(),
curEpisode->getEpisodeId(), printAddress(address),
new_value);
auto req = std::make_shared<Request>(address, sizeof(Value),
0, tester->requestorId(), 0,
threadId, nullptr);
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
uint8_t *writeData = new uint8_t[sizeof(Value)];
for (int j = 0; j < sizeof(Value); ++j) {
writeData[j] = ((uint8_t*)&new_value)[j];
}
pkt->dataDynamic(writeData);
pkt->senderState = new ProtocolTester::SenderState(this);
// increment the number of outstanding ld_st requests
pendingLdStCount++;
if (!port->sendTimingReq(pkt)) {
panic("Not expecting a failed sendTimingReq\n");
}
// add an outstanding store
addOutstandingReqs(outstandingStores, address, lane, location,
new_value);
}
}
}
void
GpuWavefront::issueAtomicOps()
{
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::ATOMIC);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
// we use atomic_inc in the tester
Request::Flags flags = Request::ATOMIC_RETURN_OP;
for (int lane = 0; lane < numLanes; ++lane) {
Location location = curAction->getLocation(lane);
assert(location >= 0);
Addr address = addrManager->getAddress(location);
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
this->getName(), curEpisode->getEpisodeId(),
printAddress(address));
// must be aligned with store size
assert(address % sizeof(Value) == 0);
AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
auto req = std::make_shared<Request>(address, sizeof(Value),
flags, tester->requestorId(),
0, threadId,
AtomicOpFunctorPtr(amo_op));
req->setPaddr(address);
req->setReqInstSeqNum(tester->getActionSeqNum());
// set protocol-specific flags
setExtraRequestFlags(req);
PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
uint8_t* data = new uint8_t[sizeof(Value)];
pkt->dataDynamic(data);
pkt->senderState = new ProtocolTester::SenderState(this);
if (!port->sendTimingReq(pkt)) {
panic("Not expecting failed sendTimingReq\n");
}
// increment the number of outstanding atomic ops
pendingAtomicCount++;
// add an outstanding atomic
addOutstandingReqs(outstandingAtomics, address, lane, location);
}
}
void
GpuWavefront::issueAcquireOp()
{
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
curEpisode->getEpisodeId());
assert(curAction);
assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
// we should not have any outstanding ops at this point
assert(pendingFenceCount == 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
auto acq_req = std::make_shared<Request>(0, 0, 0,
tester->requestorId(), 0,
threadId, nullptr);
acq_req->setPaddr(0);
acq_req->setReqInstSeqNum(tester->getActionSeqNum());
acq_req->setFlags(Request::ACQUIRE);
// set protocol-specific flags
setExtraRequestFlags(acq_req);
PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
pkt->senderState = new ProtocolTester::SenderState(this);
// increment the number of outstanding fence requests
pendingFenceCount++;
if (!port->sendTimingReq(pkt)) {
panic("Not expecting failed sendTimingReq\n");
}
}
void
GpuWavefront::issueReleaseOp()
{
DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
curEpisode->getEpisodeId());
// A release fence simply waits for all previous stores to complete. All
// previous loads and stores were done before this release operation is
// issued, so issueReleaseOp is just a no-op in this tester.
// we may be able to issue an action. Let's check
if (!threadEvent.scheduled()) {
scheduleWakeup();
}
}
void
GpuWavefront::hitCallback(PacketPtr pkt)
{
assert(pkt);
MemCmd resp_cmd = pkt->cmd;
Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr();
DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
"Addr %s\n", this->getName(),
curEpisode->getEpisodeId(), resp_cmd.toString(),
printAddress(addr));
// whether the transaction is done after this hitCallback
bool isTransactionDone = true;
if (resp_cmd == MemCmd::MemSyncResp) {
// response to a pending fence
// no validation needed for fence responses
assert(pendingFenceCount > 0);
assert(pendingLdStCount == 0);
assert(pendingAtomicCount == 0);
pendingFenceCount--;
} else if (resp_cmd == MemCmd::ReadResp) {
// response to a pending read
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
assert(outstandingLoads.count(addr) > 0);
// get return data
Value value = *(pkt->getPtr<Value>());
OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
validateLoadResp(req.origLoc, req.lane, value);
// this Read is done
pendingLdStCount--;
} else if (resp_cmd == MemCmd::WriteResp) {
// response to a pending write
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
// no need to validate Write response
// just pop it from the outstanding req table so that subsequent
// requests dependent on this write can proceed
// note that we don't decrement pendingLdStCount here yet since
// the write is not yet completed in downstream memory. Instead, we
// decrement the counter when we receive the write completion ack
assert(outstandingStores.count(addr) > 0);
OutstandingReq req = popOutstandingReq(outstandingStores, addr);
assert(req.storedValue != AddressManager::INVALID_VALUE);
// update log table
addrManager->updateLogTable(req.origLoc, threadId,
curEpisode->getEpisodeId(),
req.storedValue,
curTick(),
cuId);
// the transaction is not done yet. Waiting for write completion ack
isTransactionDone = false;
} else if (resp_cmd == MemCmd::SwapResp) {
// response to a pending atomic
assert(pendingAtomicCount > 0);
assert(pendingLdStCount == 0);
assert(outstandingAtomics.count(addr) > 0);
// get return data
Value value = *(pkt->getPtr<Value>());
// validate atomic op return
OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
validateAtomicResp(req.origLoc, req.lane, value);
// update log table
addrManager->updateLogTable(req.origLoc, threadId,
curEpisode->getEpisodeId(), value,
curTick(),
cuId);
// this Atomic is done
pendingAtomicCount--;
} else if (resp_cmd == MemCmd::WriteCompleteResp) {
// write completion ACK
assert(pendingLdStCount > 0);
assert(pendingAtomicCount == 0);
// the Write is now done
pendingLdStCount--;
} else {
panic("Unsupported MemCmd response type");
}
if (isTransactionDone) {
// no need to keep senderState and request around
delete pkt->senderState;
}
delete pkt;
// record the last active cycle to check for deadlock
lastActiveCycle = curCycle();
// we may be able to issue an action. Let's check
if (!threadEvent.scheduled()) {
scheduleWakeup();
}
}
void
GpuWavefront::setExtraRequestFlags(RequestPtr req)
{
// No extra request flag is set
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
#define CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
#include "params/GpuWavefront.hh"
#include "sim/clocked_object.hh"
class GpuWavefront : public GpuThread
{
public:
typedef GpuWavefrontParams Params;
GpuWavefront(const Params &p);
virtual ~GpuWavefront();
typedef AddressManager::Location Location;
typedef AddressManager::Value Value;
virtual void hitCallback(PacketPtr pkt);
protected:
void issueLoadOps();
void issueStoreOps();
void issueAtomicOps();
// acquire and release ops are protocol-specific, so their issue functions
// may be redefined by a child class of GpuWavefront
virtual void issueAcquireOp();
virtual void issueReleaseOp();
// set extra request flags that is specific to a target protocol
virtual void setExtraRequestFlags(RequestPtr req);
protected:
int cuId; // compute unit associated with this wavefront
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_GPU_WAVEFRONT_HH_ */

View File

@@ -0,0 +1,312 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
#include <algorithm>
#include <ctime>
#include <fstream>
#include <random>
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
#include "cpu/testers/gpu_ruby_test/gpu_thread.hh"
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
#include "debug/ProtocolTest.hh"
#include "mem/request.hh"
#include "sim/sim_exit.hh"
#include "sim/system.hh"
ProtocolTester::ProtocolTester(const Params &p)
: ClockedObject(p),
_requestorId(p.system->getRequestorId(this)),
numCpuPorts(p.port_cpu_ports_connection_count),
numVectorPorts(p.port_cu_vector_ports_connection_count),
numSqcPorts(p.port_cu_sqc_ports_connection_count),
numScalarPorts(p.port_cu_scalar_ports_connection_count),
numCusPerSqc(p.cus_per_sqc),
numCusPerScalar(p.cus_per_scalar),
numWfsPerCu(p.wavefronts_per_cu),
numWisPerWf(p.workitems_per_wavefront),
numAtomicLocs(p.num_atomic_locations),
numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
episodeLength(p.episode_length),
maxNumEpisodes(p.max_num_episodes),
debugTester(p.debug_tester),
cpuThreads(p.cpu_threads),
wfs(p.wavefronts)
{
int idx = 0; // global port index
numCpus = numCpuPorts; // 1 cpu port per CPU
numCus = numVectorPorts; // 1 vector port per CU
// create all physical cpu's data ports
for (int i = 0; i < numCpuPorts; ++i) {
DPRINTF(ProtocolTest, "Creating %s\n",
csprintf("%s-cpuPort%d", name(), i));
cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i),
this, i, idx));
idx++;
}
// create all physical gpu's data ports
for (int i = 0; i < numVectorPorts; ++i) {
DPRINTF(ProtocolTest, "Creating %s\n",
csprintf("%s-cuVectorPort%d", name(), i));
cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d",
name(), i),
this, i, idx));
idx++;
}
for (int i = 0; i < numScalarPorts; ++i) {
DPRINTF(ProtocolTest, "Creating %s\n",
csprintf("%s-cuScalarPort%d", name(), i));
cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d",
name(), i),
this, i, idx));
idx++;
}
for (int i = 0; i < numSqcPorts; ++i) {
DPRINTF(ProtocolTest, "Creating %s\n",
csprintf("%s-cuSqcPort%d", name(), i));
cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d",
name(), i),
this, i, idx));
idx++;
}
// create an address manager
addrManager = new AddressManager(numAtomicLocs,
numNormalLocsPerAtomic);
nextEpisodeId = 0;
if (!debugTester)
warn("Data race check is not enabled\n");
sentExitSignal = false;
// set random seed number
if (p.random_seed != 0) {
srand(p.random_seed);
} else {
srand(time(NULL));
}
actionCount = 0;
// create a new log file
logFile = simout.create(p.log_file);
assert(logFile);
// print test configs
std::stringstream ss;
ss << "GPU Ruby test's configurations" << std::endl
<< "\tNumber of CPUs: " << numCpus << std::endl
<< "\tNumber of CUs: " << numCus << std::endl
<< "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
<< "\tWavefront size: " << numWisPerWf << std::endl
<< "\tNumber of atomic locations: " << numAtomicLocs << std::endl
<< "\tNumber of non-atomic locations: "
<< numNormalLocsPerAtomic * numAtomicLocs << std::endl
<< "\tEpisode length: " << episodeLength << std::endl
<< "\tTest length (max number of episodes): " << maxNumEpisodes
<< std::endl
<< "\tRandom seed: " << p.random_seed
<< std::endl;
ccprintf(*(logFile->stream()), "%s", ss.str());
logFile->stream()->flush();
}
ProtocolTester::~ProtocolTester()
{
for (int i = 0; i < cpuPorts.size(); ++i)
delete cpuPorts[i];
for (int i = 0; i < cuVectorPorts.size(); ++i)
delete cuVectorPorts[i];
for (int i = 0; i < cuScalarPorts.size(); ++i)
delete cuScalarPorts[i];
for (int i = 0; i < cuSqcPorts.size(); ++i)
delete cuSqcPorts[i];
delete addrManager;
// close the log file
simout.close(logFile);
}
void
ProtocolTester::init()
{
DPRINTF(ProtocolTest, "Attach threads to ports\n");
// connect cpu threads to cpu's ports
for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) {
cpuThreads[cpu_id]->attachGpuThreadToPorts(this,
static_cast<SeqPort*>(cpuPorts[cpu_id]));
cpuThreads[cpu_id]->scheduleWakeup();
cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
}
// connect gpu wavefronts to gpu's ports
int wfId = 0;
int vectorPortId = 0;
int sqcPortId = 0;
int scalarPortId = 0;
for (int cu_id = 0; cu_id < numCus; ++cu_id) {
vectorPortId = cu_id;
sqcPortId = cu_id/numCusPerSqc;
scalarPortId = cu_id/numCusPerScalar;
for (int i = 0; i < numWfsPerCu; ++i) {
wfId = cu_id * numWfsPerCu + i;
wfs[wfId]->attachGpuThreadToPorts(this,
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
wfs[wfId]->scheduleWakeup();
wfs[wfId]->scheduleDeadlockCheckEvent();
}
}
}
Port&
ProtocolTester::getPort(const std::string &if_name, PortID idx)
{
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
// pass along to super class
return ClockedObject::getPort(if_name, idx);
} else {
if (if_name == "cpu_ports") {
if (idx > numCpuPorts)
panic("ProtocolTester: unknown cpu port %d\n", idx);
return *cpuPorts[idx];
} else if (if_name == "cu_vector_ports") {
if (idx > numVectorPorts)
panic("ProtocolTester: unknown cu vect port %d\n", idx);
return *cuVectorPorts[idx];
} else if (if_name == "cu_sqc_ports") {
if (idx > numSqcPorts)
panic("ProtocolTester: unknown cu sqc port %d\n", idx);
return *cuSqcPorts[idx];
} else {
assert(if_name == "cu_scalar_ports");
if (idx > numScalarPorts)
panic("ProtocolTester: unknown cu scal port %d\n", idx);
return *cuScalarPorts[idx];
}
}
assert(false);
}
bool
ProtocolTester::checkExit()
{
if (nextEpisodeId > maxNumEpisodes) {
if (!sentExitSignal) {
// all done
inform("Total completed episodes: %d\n", nextEpisodeId - 1);
exitSimLoop("GPU Ruby Tester: Passed!");
sentExitSignal = true;
}
return true;
}
return false;
}
bool
ProtocolTester::checkDRF(Location atomic_loc,
Location loc, bool isStore) const
{
if (debugTester) {
// go through all active episodes in all threads
for (const GpuThread* th : wfs) {
if (!th->checkDRF(atomic_loc, loc, isStore))
return false;
}
for (const GpuThread* th : cpuThreads) {
if (!th->checkDRF(atomic_loc, loc, isStore))
return false;
}
}
return true;
}
void
ProtocolTester::dumpErrorLog(std::stringstream& ss)
{
if (!sentExitSignal) {
// go through all threads and dump their outstanding requests
for (auto t : cpuThreads) {
t->printAllOutstandingReqs(ss);
}
for (auto t : wfs) {
t->printAllOutstandingReqs(ss);
}
// dump error log into a file
assert(logFile);
ccprintf(*(logFile->stream()), "%s", ss.str());
logFile->stream()->flush();
sentExitSignal = true;
// terminate the simulation
panic("GPU Ruby Tester: Failed!\n");
}
}
bool
ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt)
{
// get the requesting thread from the original sender state
ProtocolTester::SenderState* senderState =
safe_cast<ProtocolTester::SenderState*>(pkt->senderState);
GpuThread *th = senderState->th;
th->hitCallback(pkt);
return true;
}
ProtocolTester*
ProtocolTesterParams::create() const
{
return new ProtocolTester(*this);
}

View File

@@ -0,0 +1,178 @@
/*
* Copyright (c) 2017-2020 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
#define CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_
/*
* The tester includes the main ProtocolTester that manages all ports to the
* memory system.
* GpuThreads are mapped to certain data port(s)
*
* GpuThreads inject memory requests through their data ports.
* The tester receives and validates responses from the memory.
*
* Main components
* - AddressManager: generate DRF request streams &
* validate data response against an internal log_table
* - Episode: a sequence of requests
* - Thread: either GPU wavefront or CPU thread
*/
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "base/types.hh"
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
#include "mem/packet.hh"
#include "mem/ruby/system/RubyPort.hh"
#include "params/ProtocolTester.hh"
class GpuThread;
class CpuThread;
class GpuWavefront;
class ProtocolTester : public ClockedObject
{
public:
class SeqPort : public RequestPort
{
public:
SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id,
PortID _index)
: RequestPort(_name, _tester, _id)
{}
protected:
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvReqRetry()
{ panic("%s does not expect a retry\n", name()); }
};
struct SenderState : public Packet::SenderState
{
GpuThread* th;
SenderState(GpuThread* _th)
{
assert(_th);
th = _th;
}
~SenderState()
{}
};
public:
typedef ProtocolTesterParams Params;
ProtocolTester(const Params &p);
~ProtocolTester();
typedef AddressManager::Location Location;
typedef AddressManager::Value Value;
void init();
RequestorID requestorId() { return _requestorId; };
Port& getPort(const std::string &if_name,
PortID idx=InvalidPortID) override;
int getEpisodeLength() const { return episodeLength; }
// return pointer to the address manager
AddressManager* getAddressManager() const { return addrManager; }
// return true if the tester should stop issuing new episodes
bool checkExit();
// verify if a location to be picked for LD/ST will satisfy
// data race free requirement
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const;
// return the next episode id and increment it
int getNextEpisodeID() { return nextEpisodeId++; }
// get action sequence number
int getActionSeqNum() { return actionCount++; }
// dump error log into a file and exit the simulation
void dumpErrorLog(std::stringstream& ss);
private:
RequestorID _requestorId;
// list of parameters taken from python scripts
int numCpuPorts;
int numVectorPorts;
int numSqcPorts;
int numScalarPorts;
int numCusPerSqc;
int numCusPerScalar;
int numWfsPerCu;
int numWisPerWf;
// parameters controlling the address range that the tester can access
int numAtomicLocs;
int numNormalLocsPerAtomic;
// the number of actions in an episode (episodeLength +- random number)
int episodeLength;
// the maximum number of episodes to be completed by this tester
int maxNumEpisodes;
// are we debuggin the tester
bool debugTester;
// all available requestor ports connected to Ruby
std::vector<RequestPort*> cpuPorts; // cpu data ports
std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
// all CPU and GPU threads
std::vector<CpuThread*> cpuThreads;
std::vector<GpuWavefront*> wfs;
// address manager that (1) generates DRF sequences of requests,
// (2) manages an internal log table and
// (3) validate response data
AddressManager* addrManager;
// number of CPUs and CUs
int numCpus;
int numCus;
// unique id of the next episode
int nextEpisodeId;
// global action count. Overflow is fine. It's used to uniquely identify
// per-wave & per-instruction memory requests in the coalescer
int actionCount;
// if an exit signal was already sent
bool sentExitSignal;
OutputStream* logFile;
};
#endif /* CPU_TESTERS_PROTOCOL_TESTER_PROTOCOL_TESTER_HH_ */