arch, gpu-compute: Remove HSAIL related files

Change-Id: Iefba0a38d62da7598bbfe3fe6ff46454d35144b1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28410
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Tony Gutierrez
2018-05-01 17:34:29 -04:00
committed by Anthony Gutierrez
parent d2beb1ffc8
commit 9d51dec937
45 changed files with 6 additions and 12854 deletions

View File

@@ -29,7 +29,6 @@ arch-arm:
Andreas Sandberg <andreas.sandberg@arm.com>
Giacomo Travaglini <giacomo.travaglini@arm.com>
arch-gcn3:
arch-hsail:
Tony Gutierrez <anthony.gutierrez@amd.com>
arch-mips:
arch-power:

View File

@@ -989,7 +989,7 @@ all_gpu_isa_list.sort()
sticky_vars.AddVariables(
EnumVariable('TARGET_ISA', 'Target ISA', 'null', all_isa_list),
EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail', all_gpu_isa_list),
EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'gcn3', all_gpu_isa_list),
ListVariable('CPU_MODELS', 'CPU models',
sorted(n for n,m in CpuModel.dict.items() if m.default),
sorted(CpuModel.dict.keys())),

View File

@@ -1,5 +0,0 @@
PROTOCOL = 'GPU_RfO'
TARGET_ISA = 'x86'
TARGET_GPU_ISA = 'hsail'
BUILD_GPU = True
CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'

View File

@@ -37,6 +37,9 @@ import sys
Import('*')
if not env['BUILD_GPU']:
Return()
if env['TARGET_GPU_ISA'] == 'gcn3':
Source('decoder.cc')
Source('insts/gpu_static_inst.cc')

View File

@@ -1,67 +0,0 @@
// University of Illinois/NCSA
// Open Source License
//
// Copyright (c) 2013, Advanced Micro Devices, Inc.
// All rights reserved.
//
// Developed by:
//
// HSA Team
//
// Advanced Micro Devices, Inc
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal with
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of the LLVM Team, University of Illinois at
// Urbana-Champaign, nor the names of its contributors may be used to
// endorse or promote products derived from this Software without specific
// prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
// SOFTWARE.
#ifndef INTERNAL_BRIG_H
#define INTERNAL_BRIG_H
#include <stdint.h>
namespace Brig {
#include "Brig_new.hpp"
// These typedefs provide some backward compatibility with earlier versions
// of Brig.h, reducing the number of code changes. The distinct names also
// increase legibility by showing the code's intent.
typedef BrigBase BrigDirective;
typedef BrigBase BrigOperand;
enum BrigMemoryFenceSegments { // for internal use only
//.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc }
//.mnemo_token=_EMMemoryFenceSegments
//.mnemo_context=EInstModifierInstFenceContext
BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0,
BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1,
BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2,
BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip
};
}
#endif // defined(INTERNAL_BRIG_H)

File diff suppressed because it is too large Load Diff

View File

@@ -1,53 +0,0 @@
# -*- mode:python -*-
# Copyright (c) 2015 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: Anthony Gutierrez
#
Import('*')
if not env['BUILD_GPU']:
Return()
if env['TARGET_GPU_ISA'] == 'hsail':
env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
'gen.py', '$SOURCE $TARGETS')
Source('gpu_decoder.cc')
Source('insts/branch.cc')
Source('insts/gen_exec.cc')
Source('insts/gpu_static_inst.cc')
Source('insts/main.cc')
Source('insts/pseudo_inst.cc')
Source('insts/mem.cc')
Source('operand.cc')

View File

@@ -1,40 +0,0 @@
# -*- mode:python -*-
#
# Copyright (c) 2015 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: Anthony Gutierrez
#
Import('*')
all_gpu_isa_list.append('hsail')

View File

@@ -1,912 +0,0 @@
#!/usr/bin/env python
# Copyright (c) 2015 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: Steve Reinhardt
#
from __future__ import print_function
import sys, re
from m5.util import code_formatter
if len(sys.argv) != 4:
print("Error: need 3 args (file names)")
sys.exit(0)
header_code = code_formatter()
decoder_code = code_formatter()
exec_code = code_formatter()
###############
#
# Generate file prologs (includes etc.)
#
###############
header_code('''
#include "arch/hsail/insts/decl.hh"
#include "base/bitfield.hh"
#include "gpu-compute/hsail_code.hh"
#include "gpu-compute/wavefront.hh"
namespace HsailISA
{
''')
header_code.indent()
decoder_code('''
#include "arch/hsail/gpu_decoder.hh"
#include "arch/hsail/insts/branch.hh"
#include "arch/hsail/insts/decl.hh"
#include "arch/hsail/insts/gen_decl.hh"
#include "arch/hsail/insts/mem.hh"
#include "arch/hsail/insts/mem_impl.hh"
#include "gpu-compute/brig_object.hh"
namespace HsailISA
{
std::vector<GPUStaticInst*> Decoder::decodedInsts;
GPUStaticInst*
Decoder::decode(MachInst machInst)
{
using namespace Brig;
const BrigInstBase *ib = machInst.brigInstBase;
const BrigObject *obj = machInst.brigObj;
switch(ib->opcode) {
''')
decoder_code.indent()
decoder_code.indent()
exec_code('''
#include "arch/hsail/insts/gen_decl.hh"
#include "base/intmath.hh"
namespace HsailISA
{
''')
exec_code.indent()
###############
#
# Define code templates for class declarations (for header file)
#
###############
# Basic header template for an instruction stub.
header_template_stub = '''
class $class_name : public $base_class
{
public:
typedef $base_class Base;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
# Basic header template for an instruction with no template parameters.
header_template_nodt = '''
class $class_name : public $base_class
{
public:
typedef $base_class Base;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
# Basic header template for an instruction with a single DataType
# template parameter.
header_template_1dt = '''
template<typename DataType>
class $class_name : public $base_class<DataType>
{
public:
typedef $base_class<DataType> Base;
typedef typename DataType::CType CType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
header_template_1dt_noexec = '''
template<typename DataType>
class $class_name : public $base_class<DataType>
{
public:
typedef $base_class<DataType> Base;
typedef typename DataType::CType CType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
};
'''
# Same as header_template_1dt, except the base class has a second
# template parameter NumSrcOperands to allow a variable number of
# source operands. Note that since this is implemented with an array,
# it only works for instructions where all sources are of the same
# type (like most arithmetics).
header_template_1dt_varsrcs = '''
template<typename DataType>
class $class_name : public $base_class<DataType, $num_srcs>
{
public:
typedef $base_class<DataType, $num_srcs> Base;
typedef typename DataType::CType CType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
# Header template for instruction with two DataType template
# parameters, one for the dest and one for the source. This is used
# by compare and convert.
header_template_2dt = '''
template<typename DestDataType, class SrcDataType>
class $class_name : public $base_class<DestDataType, SrcDataType>
{
public:
typedef $base_class<DestDataType, SrcDataType> Base;
typedef typename DestDataType::CType DestCType;
typedef typename SrcDataType::CType SrcCType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
header_templates = {
'ArithInst': header_template_1dt_varsrcs,
'CmovInst': header_template_1dt,
'ClassInst': header_template_1dt,
'ShiftInst': header_template_1dt,
'ExtractInsertInst': header_template_1dt,
'CmpInst': header_template_2dt,
'CvtInst': header_template_2dt,
'PopcountInst': header_template_2dt,
'LdInst': '',
'StInst': '',
'SpecialInstNoSrc': header_template_nodt,
'SpecialInst1Src': header_template_nodt,
'SpecialInstNoSrcNoDest': '',
'Stub': header_template_stub,
}
###############
#
# Define code templates for exec functions
#
###############
# exec function body
exec_template_stub = '''
void
$class_name::execute(GPUDynInstPtr gpuDynInst)
{
fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
}
'''
exec_template_nodt_nosrc = '''
void
$class_name::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef Base::DestCType DestCType;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestCType dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_nodt_1src = '''
void
$class_name::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef Base::DestCType DestCType;
typedef Base::SrcCType SrcCType;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
DestCType dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_1dt_varsrcs = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<CType>(w, lane);
}
CType src_val[$num_srcs];
for (int i = 0; i < $num_srcs; ++i) {
src_val[i] = this->src[i].template get<CType>(w, lane);
}
dest_val = (CType)($expr);
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_1dt_3srcs = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename Base::Src0CType Src0T;
typedef typename Base::Src1CType Src1T;
typedef typename Base::Src2CType Src2T;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<CType>(w, lane);
}
Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_1dt_2src_1dest = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename Base::DestCType DestT;
typedef CType Src0T;
typedef typename Base::Src1CType Src1T;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestT dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<DestT>(w, lane);
}
Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_shift = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<CType>(w, lane);
}
CType src_val0 = this->src0.template get<CType>(w, lane);
uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_2dt = '''
template<typename DestDataType, class SrcDataType>
void
$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestCType dest_val;
SrcCType src_val[$num_srcs];
for (int i = 0; i < $num_srcs; ++i) {
src_val[i] = this->src[i].template get<SrcCType>(w, lane);
}
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_templates = {
'ArithInst': exec_template_1dt_varsrcs,
'CmovInst': exec_template_1dt_3srcs,
'ExtractInsertInst': exec_template_1dt_3srcs,
'ClassInst': exec_template_1dt_2src_1dest,
'CmpInst': exec_template_2dt,
'CvtInst': exec_template_2dt,
'PopcountInst': exec_template_2dt,
'LdInst': '',
'StInst': '',
'SpecialInstNoSrc': exec_template_nodt_nosrc,
'SpecialInst1Src': exec_template_nodt_1src,
'SpecialInstNoSrcNoDest': '',
'Stub': exec_template_stub,
}
###############
#
# Define code templates for the decoder cases
#
###############
# decode template for nodt-opcode case
decode_nodt_template = '''
case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
decode_case_prolog_class_inst = '''
case BRIG_OPCODE_$brig_opcode_upper:
{
//const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
//switch (baseOp->kind) {
// case BRIG_OPERAND_REG:
// type = ((const BrigOperandReg*)baseOp)->type;
// break;
// case BRIG_OPERAND_IMMED:
// type = ((const BrigOperandImmed*)baseOp)->type;
// break;
// default:
// fatal("CLASS unrecognized kind of operand %d\\n",
// baseOp->kind);
//}
switch (type) {'''
# common prolog for 1dt- or 2dt-opcode case: switch on data type
decode_case_prolog = '''
case BRIG_OPCODE_$brig_opcode_upper:
{
switch (ib->type) {'''
# single-level decode case entry (for 1dt opcodes)
decode_case_entry = \
' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
decode_store_prolog = \
' case BRIG_TYPE_$type_name: {'
decode_store_case_epilog = '''
}'''
decode_store_case_entry = \
' return $constructor(ib, obj);'
# common epilog for type switch
decode_case_epilog = '''
default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
ib->type);
}
}
break;'''
# Additional templates for nested decode on a second type field (for
# compare and convert). These are used in place of the
# decode_case_entry template to create a second-level switch on on the
# second type field inside each case of the first-level type switch.
# Because the name and location of the second type can vary, the Brig
# instruction type must be provided in $brig_type, and the name of the
# second type field must be provided in $type_field.
decode_case2_prolog = '''
case BRIG_TYPE_$type_name:
switch (((Brig$brig_type*)ib)->$type2_field) {'''
decode_case2_entry = \
' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
decode_case2_epilog = '''
default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
((Brig$brig_type*)ib)->$type2_field);
}
break;'''
# Figure out how many source operands an expr needs by looking for the
# highest-numbered srcN value referenced. Since sources are numbered
# starting at 0, the return value is N+1.
def num_src_operands(expr):
if expr.find('src2') != -1:
return 3
elif expr.find('src1') != -1:
return 2
elif expr.find('src0') != -1:
return 1
else:
return 0
###############
#
# Define final code generation methods
#
# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
# generating actual instructions.
#
###############
# Generate class declaration, exec function, and decode switch case
# for an brig_opcode with a single-level type switch. The 'types'
# parameter is a list or tuple of types for which the instruction
# should be instantiated.
def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
type2_info=None, constructor_prefix='new ', is_store=False):
brig_opcode_upper = brig_opcode.upper()
class_name = brig_opcode
opcode = class_name.lower()
if base_class == 'ArithInst':
# note that expr must be provided with ArithInst so we can
# derive num_srcs for the template
assert expr
if expr:
# Derive several bits of info from expr. If expr is not used,
# this info will be irrelevant.
num_srcs = num_src_operands(expr)
# if the RHS expression includes 'dest', then we're doing an RMW
# on the reg and we need to treat it like a source
dest_is_src = expr.find('dest') != -1
dest_is_src_flag = str(dest_is_src).lower() # for C++
if base_class in ['ShiftInst']:
expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
else:
expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
expr = re.sub(r'\bdest\b', r'dest_val', expr)
# Strip template arguments off of base class before looking up
# appropriate templates
base_class_base = re.sub(r'<.*>$', '', base_class)
header_code(header_templates[base_class_base])
if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
exec_code(exec_templates[base_class_base])
elif base_class.startswith('ShiftInst'):
header_code(exec_template_shift)
else:
header_code(exec_templates[base_class_base])
if not types or isinstance(types, str):
# Just a single type
constructor = constructor_prefix + class_name
decoder_code(decode_nodt_template)
else:
# multiple types, need at least one level of decode
if brig_opcode == 'Class':
decoder_code(decode_case_prolog_class_inst)
else:
decoder_code(decode_case_prolog)
if not type2_info:
if not is_store:
# single list of types, to basic one-level decode
for type_name in types:
full_class_name = '%s<%s>' % (class_name, type_name.upper())
constructor = constructor_prefix + full_class_name
decoder_code(decode_case_entry)
else:
# single list of types, to basic one-level decode
for type_name in types:
decoder_code(decode_store_prolog)
type_size = int(re.findall(r'[0-9]+', type_name)[0])
src_size = 32
type_type = type_name[0]
full_class_name = '%s<%s,%s>' % (class_name, \
type_name.upper(), \
'%s%d' % \
(type_type.upper(), \
type_size))
constructor = constructor_prefix + full_class_name
decoder_code(decode_store_case_entry)
decoder_code(decode_store_case_epilog)
else:
# need secondary type switch (convert, compare)
# unpack extra info on second switch
(type2_field, types2) = type2_info
brig_type = 'Inst%s' % brig_opcode
for type_name in types:
decoder_code(decode_case2_prolog)
fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
for type2_name in types2:
full_class_name = fmt % type2_name.upper()
constructor = constructor_prefix + full_class_name
decoder_code(decode_case2_entry)
decoder_code(decode_case2_epilog)
decoder_code(decode_case_epilog)
###############
#
# Generate instructions
#
###############
# handy abbreviations for common sets of types
# arithmetic ops are typically defined only on 32- and 64-bit sizes
arith_int_types = ('S32', 'U32', 'S64', 'U64')
arith_float_types = ('F32', 'F64')
arith_types = arith_int_types + arith_float_types
bit_types = ('B1', 'B32', 'B64')
all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
# I think you might be able to do 'f16' memory ops too, but we'll
# ignore them for now.
mem_types = all_int_types + arith_float_types
mem_atom_types = all_int_types + ('B32', 'B64')
##### Arithmetic & logical operations
gen('Add', arith_types, 'src0 + src1')
gen('Sub', arith_types, 'src0 - src1')
gen('Mul', arith_types, 'src0 * src1')
gen('Div', arith_types, 'src0 / src1')
gen('Min', arith_types, 'std::min(src0, src1)')
gen('Max', arith_types, 'std::max(src0, src1)')
gen('Gcnmin', arith_types, 'std::min(src0, src1)')
gen('CopySign', arith_float_types,
'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
gen('Sqrt', arith_float_types, 'sqrt(src0)')
gen('Floor', arith_float_types, 'floor(src0)')
# "fast" sqrt... same as slow for us
gen('Nsqrt', arith_float_types, 'sqrt(src0)')
gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
gen('Nrcp', arith_float_types, '1.0/src0')
gen('Fract', arith_float_types,
'(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
gen('Ncos', arith_float_types, 'cos(src0)');
gen('Nsin', arith_float_types, 'sin(src0)');
gen('And', bit_types, 'src0 & src1')
gen('Or', bit_types, 'src0 | src1')
gen('Xor', bit_types, 'src0 ^ src1')
gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
('sourceType', ('B32', 'B64')))
gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
# gen('Mul_hi', types=('s32','u32', '??'))
# gen('Mul24', types=('s32','u32', '??'))
gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
gen('Abs', arith_types, 'std::abs(src0)')
gen('Neg', arith_types, '-src0')
gen('Mov', bit_types + arith_types, 'src0')
gen('Not', bit_types, 'heynot(src0)')
# mad and fma differ only in rounding behavior, which we don't emulate
# also there's an integer form of mad, but not of fma
gen('Mad', arith_types, 'src0 * src1 + src2')
gen('Fma', arith_float_types, 'src0 * src1 + src2')
#native floating point operations
gen('Nfma', arith_float_types, 'src0 * src1 + src2')
gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
# see base/bitfield.hh
gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
'ExtractInsertInst')
gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
'ExtractInsertInst')
##### Compare
gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
'CmpInst', ('sourceType', arith_types + bit_types))
gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
##### Conversion
# Conversion operations are only defined on B1, not B32 or B64
cvt_types = ('B1',) + mem_types
gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
##### Load & Store
gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
is_store=True)
gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
gen('AtomicNoRet', mem_atom_types, base_class='StInst',
constructor_prefix='decode')
gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
gen('Br', base_class = 'LdInst', constructor_prefix='decode')
##### Special operations
def gen_special(brig_opcode, expr, dest_type='U32'):
num_srcs = num_src_operands(expr)
if num_srcs == 0:
base_class = 'SpecialInstNoSrc<%s>' % dest_type
elif num_srcs == 1:
base_class = 'SpecialInst1Src<%s>' % dest_type
else:
assert false
gen(brig_opcode, None, expr, base_class)
gen_special('WorkItemId', 'w->workItemId[src0][lane]')
gen_special('WorkItemAbsId',
'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
gen_special('WorkGroupId', 'w->workGroupId[src0]')
gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
gen_special('GridSize', 'w->gridSz[src0]')
gen_special('GridGroups',
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
gen_special('LaneId', 'lane')
gen_special('WaveId', 'w->wfId')
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
# gen_special('CU'', ')
gen('Ret', base_class='SpecialInstNoSrcNoDest')
gen('Barrier', base_class='SpecialInstNoSrcNoDest')
gen('MemFence', base_class='SpecialInstNoSrcNoDest')
# Map magic instructions to the BrigSyscall opcode
# Magic instructions are defined in magic.hh
#
# In the future, real HSA kernel system calls can be implemented and coexist
# with magic instructions.
gen('Call', base_class='SpecialInstNoSrcNoDest')
# Stubs for unimplemented instructions:
# These may need to be implemented at some point in the future, but
# for now we just match the instructions with their operands.
#
# By defining stubs for these instructions, we can work with
# applications that have them in dead/unused code paths.
#
# Needed for rocm-hcc compilations for HSA backends since
# builtins-hsail library is `cat`d onto the generated kernels.
# The builtins-hsail library consists of handcoded hsail functions
# that __might__ be needed by the rocm-hcc compiler in certain binaries.
gen('Bitmask', base_class='Stub')
gen('Bitrev', base_class='Stub')
gen('Firstbit', base_class='Stub')
gen('Lastbit', base_class='Stub')
gen('Unpacklo', base_class='Stub')
gen('Unpackhi', base_class='Stub')
gen('Pack', base_class='Stub')
gen('Unpack', base_class='Stub')
gen('Lerp', base_class='Stub')
gen('Packcvt', base_class='Stub')
gen('Unpackcvt', base_class='Stub')
gen('Sad', base_class='Stub')
gen('Sadhi', base_class='Stub')
gen('Activelanecount', base_class='Stub')
gen('Activelaneid', base_class='Stub')
gen('Activelanemask', base_class='Stub')
gen('Activelanepermute', base_class='Stub')
gen('Groupbaseptr', base_class='Stub')
gen('Signalnoret', base_class='Stub')
###############
#
# Generate file epilogs
#
###############
header_code('''
template<>
inline void
Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
CType src_val;
src_val = this->src[0].template get<CType>(w, lane);
dest_val = (CType)(src_val);
this->dest.set(w, lane, dest_val);
}
}
}
template<>
inline void
Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
CType src_val;
src_val = this->src[0].template get<CType>(w, lane);
dest_val = (CType)(src_val);
this->dest.set(w, lane, dest_val);
}
}
}
''')
header_code.dedent()
header_code('''
} // namespace HsailISA
''')
# close off main decode switch
decoder_code.dedent()
decoder_code.dedent()
decoder_code('''
default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
} // end switch(ib->opcode)
} // end decode()
} // namespace HsailISA
''')
exec_code.dedent()
exec_code('''
} // namespace HsailISA
''')
###############
#
# Output accumulated code to files
#
###############
header_code.write(sys.argv[1])
decoder_code.write(sys.argv[2])
exec_code.write(sys.argv[3])

View File

@@ -1,77 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __ARCH_HSAIL_GPU_DECODER_HH__
#define __ARCH_HSAIL_GPU_DECODER_HH__
#include <vector>
#include "arch/hsail/gpu_types.hh"
class BrigObject;
class GPUStaticInst;
namespace Brig
{
class BrigInstBase;
}
namespace HsailISA
{
class Decoder
{
public:
GPUStaticInst* decode(MachInst machInst);
GPUStaticInst*
decode(RawMachInst inst)
{
return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr;
}
RawMachInst
saveInst(GPUStaticInst *decodedInst)
{
decodedInsts.push_back(decodedInst);
return decodedInsts.size() - 1;
}
private:
static std::vector<GPUStaticInst*> decodedInsts;
};
} // namespace HsailISA
#endif // __ARCH_HSAIL_GPU_DECODER_HH__

View File

@@ -1,75 +0,0 @@
/*
* Copyright (c) 2016 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_HSAIL_GPU_ISA_HH__
#define __ARCH_HSAIL_GPU_ISA_HH__
#include <cstdint>
#include "arch/hsail/gpu_types.hh"
#include "base/logging.hh"
#include "base/types.hh"
#include "gpu-compute/misc.hh"
namespace HsailISA
{
class GPUISA
{
public:
GPUISA()
{
}
void
writeMiscReg(int opIdx, RegVal operandVal)
{
fatal("HSAIL does not implement misc registers yet\n");
}
RegVal
readMiscReg(int opIdx) const
{
fatal("HSAIL does not implement misc registers yet\n");
}
bool hasScalarUnit() const { return false; }
uint32_t
advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
{
return old_pc + sizeof(RawMachInst);
}
};
}
#endif // __ARCH_HSAIL_GPU_ISA_HH__

View File

@@ -1,69 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __ARCH_HSAIL_GPU_TYPES_HH__
#define __ARCH_HSAIL_GPU_TYPES_HH__
#include <cstdint>
namespace Brig
{
class BrigInstBase;
}
class BrigObject;
namespace HsailISA
{
// A raw machine instruction represents the raw bits that
// our model uses to represent an actual instruction. In
// the case of HSAIL this is just an index into a list of
// instruction objects.
typedef uint32_t RawMachInst;
// The MachInst is a representation of an instruction
// that has more information than just the machine code.
// For HSAIL the actual machine code is a BrigInstBase
// and the BrigObject contains more pertinent
// information related to operaands, etc.
struct MachInst
{
const Brig::BrigInstBase *brigInstBase;
const BrigObject *brigObj;
};
}
#endif // __ARCH_HSAIL_GPU_TYPES_HH__

View File

@@ -1,86 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#include "arch/hsail/insts/branch.hh"
#include "gpu-compute/hsail_code.hh"
namespace HsailISA
{
GPUStaticInst*
decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj)
{
// Detect direct vs indirect branch by seeing whether we have a
// register operand.
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
const Brig::BrigOperand *reg = obj->getOperand(op_offs);
if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
return new BrnIndirectInst(ib, obj);
} else {
return new BrnDirectInst(ib, obj);
}
}
GPUStaticInst*
decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj)
{
// Detect direct vs indirect branch by seeing whether we have a
// second register operand (after the condition).
unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
const Brig::BrigOperand *reg = obj->getOperand(op_offs);
if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
return new CbrIndirectInst(ib, obj);
} else {
return new CbrDirectInst(ib, obj);
}
}
GPUStaticInst*
decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj)
{
// Detect direct vs indirect branch by seeing whether we have a
// second register operand (after the condition).
unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
const Brig::BrigOperand *reg = obj->getOperand(op_offs);
if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
return new BrIndirectInst(ib, obj);
} else {
return new BrDirectInst(ib, obj);
}
}
} // namespace HsailISA

View File

@@ -1,441 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
#define __ARCH_HSAIL_INSTS_BRANCH_HH__
#include "arch/hsail/insts/gpu_static_inst.hh"
#include "arch/hsail/operand.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/wavefront.hh"
namespace HsailISA
{
// The main difference between a direct branch and an indirect branch
// is whether the target is a register or a label, so we can share a
// lot of code if we template the base implementation on that type.
template<typename TargetType>
class BrnInstBase : public HsailGPUStaticInst
{
public:
void generateDisassembly() override;
Brig::BrigWidth8_t width;
TargetType target;
BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "brn")
{
setFlag(Branch);
setFlag(UnconditionalJump);
width = ((Brig::BrigInstBr*)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj);
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isVectorRegister();
}
bool isCondRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isCondRegister();
}
bool isScalarRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isScalarRegister();
}
bool isSrcOperand(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return true;
}
bool isDstOperand(int operandIndex) override {
return false;
}
int getOperandSize(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.opSize();
}
int
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
{
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.regIndex();
}
int getNumOperands() override {
return 1;
}
void execute(GPUDynInstPtr gpuDynInst) override;
};
template<typename TargetType>
void
BrnInstBase<TargetType>::generateDisassembly()
{
std::string widthClause;
if (width != 1) {
widthClause = csprintf("_width(%d)", width);
}
disassembly = csprintf("%s%s %s", opcode, widthClause,
target.disassemble());
}
template<typename TargetType>
void
BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
if (getTargetPc() == w->rpc()) {
w->popFromReconvergenceStack();
} else {
// Rpc and execution mask remain the same
w->pc(getTargetPc());
}
}
class BrnDirectInst : public BrnInstBase<LabelOperand>
{
public:
BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
: BrnInstBase<LabelOperand>(ib, obj)
{
}
int numSrcRegOperands() { return 0; }
int numDstRegOperands() { return 0; }
};
class BrnIndirectInst : public BrnInstBase<SRegOperand>
{
public:
BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
: BrnInstBase<SRegOperand>(ib, obj)
{
}
int numSrcRegOperands() { return target.isVectorRegister(); }
int numDstRegOperands() { return 0; }
};
GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
const BrigObject *obj);
template<typename TargetType>
class CbrInstBase : public HsailGPUStaticInst
{
public:
void generateDisassembly() override;
Brig::BrigWidth8_t width;
CRegOperand cond;
TargetType target;
CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "cbr")
{
setFlag(Branch);
width = ((Brig::BrigInstBr *)ib)->width;
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
cond.init(op_offs, obj);
op_offs = obj->getOperandPtr(ib->operands, 1);
target.init(op_offs, obj);
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
void execute(GPUDynInstPtr gpuDynInst) override;
// Assumption: Target is operand 0, Condition Register is operand 1
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
if (!operandIndex)
return target.isVectorRegister();
else
return false;
}
bool isCondRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
if (!operandIndex)
return target.isCondRegister();
else
return true;
}
bool isScalarRegister(int operandIndex) override {
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
if (!operandIndex)
return target.isScalarRegister();
else
return false;
}
bool isSrcOperand(int operandIndex) override {
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
if (operandIndex == 0)
return true;
return false;
}
// both Condition Register and Target are source operands
bool isDstOperand(int operandIndex) override {
return false;
}
int getOperandSize(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
if (!operandIndex)
return target.opSize();
else
return 1;
}
int
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
{
assert(operandIndex >= 0 && operandIndex < getNumOperands());
if (!operandIndex)
return target.regIndex();
else
return -1;
}
// Operands = Target, Condition Register
int getNumOperands() override {
return 2;
}
};
template<typename TargetType>
void
CbrInstBase<TargetType>::generateDisassembly()
{
std::string widthClause;
if (width != 1) {
widthClause = csprintf("_width(%d)", width);
}
disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
cond.disassemble(), target.disassemble());
}
template<typename TargetType>
void
CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const uint32_t curr_pc M5_VAR_USED = w->pc();
const uint32_t curr_rpc = w->rpc();
const VectorMask curr_mask = w->execMask();
/**
* TODO: can we move this pop outside the instruction, and
* into the wavefront?
*/
w->popFromReconvergenceStack();
// immediate post-dominator instruction
const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
if (curr_rpc != rpc) {
w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
}
// taken branch
const uint32_t true_pc = getTargetPc();
VectorMask true_mask;
for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
}
// not taken branch
const uint32_t false_pc = nextInstAddr();
assert(true_pc != false_pc);
if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
VectorMask false_mask = curr_mask & ~true_mask;
w->pushToReconvergenceStack(false_pc, rpc, false_mask);
}
if (true_pc != rpc && true_mask.count()) {
w->pushToReconvergenceStack(true_pc, rpc, true_mask);
}
assert(w->pc() != curr_pc);
}
class CbrDirectInst : public CbrInstBase<LabelOperand>
{
public:
CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
: CbrInstBase<LabelOperand>(ib, obj)
{
}
// the source operand of a conditional branch is a Condition
// Register which is not stored in the VRF
// so we do not count it as a source-register operand
// even though, formally, it is one.
int numSrcRegOperands() { return 0; }
int numDstRegOperands() { return 0; }
};
class CbrIndirectInst : public CbrInstBase<SRegOperand>
{
public:
CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
: CbrInstBase<SRegOperand>(ib, obj)
{
}
// one source operand of the conditional indirect branch is a Condition
// register which is not stored in the VRF so we do not count it
// as a source-register operand even though, formally, it is one.
int numSrcRegOperands() { return target.isVectorRegister(); }
int numDstRegOperands() { return 0; }
};
GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
const BrigObject *obj);
template<typename TargetType>
class BrInstBase : public HsailGPUStaticInst
{
public:
void generateDisassembly() override;
ImmOperand<uint32_t> width;
TargetType target;
BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
: HsailGPUStaticInst(obj, "br")
{
setFlag(Branch);
setFlag(UnconditionalJump);
width.init(((Brig::BrigInstBr *)ib)->width, obj);
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
target.init(op_offs, obj);
}
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
void execute(GPUDynInstPtr gpuDynInst) override;
bool isVectorRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isVectorRegister();
}
bool isCondRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isCondRegister();
}
bool isScalarRegister(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.isScalarRegister();
}
bool isSrcOperand(int operandIndex) override {
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
return true;
}
bool isDstOperand(int operandIndex) override { return false; }
int getOperandSize(int operandIndex) override {
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.opSize();
}
int
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
{
assert(operandIndex >= 0 && operandIndex < getNumOperands());
return target.regIndex();
}
int getNumOperands() override { return 1; }
};
template<typename TargetType>
void
BrInstBase<TargetType>::generateDisassembly()
{
std::string widthClause;
if (width.bits != 1) {
widthClause = csprintf("_width(%d)", width.bits);
}
disassembly = csprintf("%s%s %s", opcode, widthClause,
target.disassemble());
}
template<typename TargetType>
void
BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
if (getTargetPc() == w->rpc()) {
w->popFromReconvergenceStack();
} else {
// Rpc and execution mask remain the same
w->pc(getTargetPc());
}
}
class BrDirectInst : public BrInstBase<LabelOperand>
{
public:
BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
: BrInstBase<LabelOperand>(ib, obj)
{
}
int numSrcRegOperands() { return 0; }
int numDstRegOperands() { return 0; }
};
class BrIndirectInst : public BrInstBase<SRegOperand>
{
public:
BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
: BrInstBase<SRegOperand>(ib, obj)
{
}
int numSrcRegOperands() { return target.isVectorRegister(); }
int numDstRegOperands() { return 0; }
};
GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
const BrigObject *obj);
} // namespace HsailISA
#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__

File diff suppressed because it is too large Load Diff

View File

@@ -1,53 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#include "arch/hsail/insts/gpu_static_inst.hh"
#include "gpu-compute/brig_object.hh"
namespace HsailISA
{
HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj,
const std::string &opcode)
: GPUStaticInst(opcode), hsailCode(obj->currentCode)
{
}
void
HsailGPUStaticInst::generateDisassembly()
{
disassembly = opcode;
}
} // namespace HsailISA

View File

@@ -1,66 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
/*
* @file gpu_static_inst.hh
*
* Defines the base class representing HSAIL GPU static instructions.
*/
#include "arch/hsail/gpu_types.hh"
#include "gpu-compute/gpu_static_inst.hh"
class BrigObject;
class HsailCode;
namespace HsailISA
{
class HsailGPUStaticInst : public GPUStaticInst
{
public:
HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
void generateDisassembly() override;
int instSize() const override { return sizeof(RawMachInst); }
bool isValid() const override { return true; }
protected:
HsailCode *hsailCode;
};
} // namespace HsailISA
#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__

View File

@@ -1,209 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "arch/hsail/insts/decl.hh"
#include "debug/GPUExec.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/simple_pool_manager.hh"
namespace HsailISA
{
template<> const char *B1::label = "b1";
template<> const char *B8::label = "b8";
template<> const char *B16::label = "b16";
template<> const char *B32::label = "b32";
template<> const char *B64::label = "b64";
template<> const char *S8::label = "s8";
template<> const char *S16::label = "s16";
template<> const char *S32::label = "s32";
template<> const char *S64::label = "s64";
template<> const char *U8::label = "u8";
template<> const char *U16::label = "u16";
template<> const char *U32::label = "u32";
template<> const char *U64::label = "u64";
template<> const char *F32::label = "f32";
template<> const char *F64::label = "f64";
const char*
cmpOpToString(Brig::BrigCompareOperation cmpOp)
{
using namespace Brig;
switch (cmpOp) {
case BRIG_COMPARE_EQ:
return "eq";
case BRIG_COMPARE_NE:
return "ne";
case BRIG_COMPARE_LT:
return "lt";
case BRIG_COMPARE_LE:
return "le";
case BRIG_COMPARE_GT:
return "gt";
case BRIG_COMPARE_GE:
return "ge";
case BRIG_COMPARE_EQU:
return "equ";
case BRIG_COMPARE_NEU:
return "neu";
case BRIG_COMPARE_LTU:
return "ltu";
case BRIG_COMPARE_LEU:
return "leu";
case BRIG_COMPARE_GTU:
return "gtu";
case BRIG_COMPARE_GEU:
return "geu";
case BRIG_COMPARE_NUM:
return "num";
case BRIG_COMPARE_NAN:
return "nan";
case BRIG_COMPARE_SEQ:
return "seq";
case BRIG_COMPARE_SNE:
return "sne";
case BRIG_COMPARE_SLT:
return "slt";
case BRIG_COMPARE_SLE:
return "sle";
case BRIG_COMPARE_SGT:
return "sgt";
case BRIG_COMPARE_SGE:
return "sge";
case BRIG_COMPARE_SGEU:
return "sgeu";
case BRIG_COMPARE_SEQU:
return "sequ";
case BRIG_COMPARE_SNEU:
return "sneu";
case BRIG_COMPARE_SLTU:
return "sltu";
case BRIG_COMPARE_SLEU:
return "sleu";
case BRIG_COMPARE_SNUM:
return "snum";
case BRIG_COMPARE_SNAN:
return "snan";
case BRIG_COMPARE_SGTU:
return "sgtu";
default:
return "unknown";
}
}
void
Ret::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
// mask off completed work-items
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->initMask[lane] = 0;
}
}
// delete extra instructions fetched for completed work-items
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
w->instructionBuffer.end());
if (w->pendingFetch) {
w->dropFetch = true;
}
// if all work-items have completed, then wave-front is done
if (w->initMask.none()) {
w->status = Wavefront::S_STOPPED;
int32_t refCount = w->computeUnit->getLds().
decreaseRefCounter(w->dispatchId, w->wgId);
DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
w->computeUnit->cu_id, w->wgId, refCount);
// free the vector registers of the completed wavefront
w->computeUnit->vectorRegsReserved[w->simdId] -=
w->reservedVectorRegs;
assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
uint32_t endIndex = (w->startVgprIndex +
w->reservedVectorRegs - 1) %
w->computeUnit->vrf[w->simdId]->numRegs();
w->computeUnit->vrf[w->simdId]->manager->
freeRegion(w->startVgprIndex, endIndex);
w->reservedVectorRegs = 0;
w->startVgprIndex = 0;
w->computeUnit->completedWfs++;
DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
if (!refCount) {
setFlag(SystemScope);
setFlag(Release);
setFlag(GlobalSegment);
// Notify Memory System of Kernel Completion
// Kernel End = isKernel + isRelease
w->status = Wavefront::S_RETURNING;
GPUDynInstPtr local_mempacket = gpuDynInst;
local_mempacket->useContinuation = false;
local_mempacket->simdId = w->simdId;
local_mempacket->wfSlotId = w->wfSlotId;
local_mempacket->wfDynId = w->wfDynId;
w->computeUnit->injectGlobalMemFence(local_mempacket, true);
} else {
w->computeUnit->shader->dispatcher->scheduleDispatch();
}
}
}
void
Barrier::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
assert(w->barrierCnt == w->oldBarrierCnt);
w->barrierCnt = w->oldBarrierCnt + 1;
w->stalledAtBarrier = true;
}
} // namespace HsailISA

View File

@@ -1,76 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "arch/hsail/insts/mem.hh"
#include "arch/hsail/Brig.h"
using namespace Brig;
namespace HsailISA
{
const char* atomicOpToString(BrigAtomicOperation brigOp);
const char*
atomicOpToString(BrigAtomicOperation brigOp)
{
switch (brigOp) {
case BRIG_ATOMIC_AND:
return "and";
case BRIG_ATOMIC_OR:
return "or";
case BRIG_ATOMIC_XOR:
return "xor";
case BRIG_ATOMIC_CAS:
return "cas";
case BRIG_ATOMIC_EXCH:
return "exch";
case BRIG_ATOMIC_ADD:
return "add";
case BRIG_ATOMIC_WRAPINC:
return "inc";
case BRIG_ATOMIC_WRAPDEC:
return "dec";
case BRIG_ATOMIC_MIN:
return "min";
case BRIG_ATOMIC_MAX:
return "max";
case BRIG_ATOMIC_SUB:
return "sub";
default:
return "unknown";
}
}
} // namespace HsailISA

File diff suppressed because it is too large Load Diff

View File

@@ -1,648 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "gpu-compute/hsail_code.hh"
// defined in code.cc, but not worth sucking in all of code.h for this
// at this point
extern const char *segmentNames[];
namespace HsailISA
{
template<typename DestDataType, typename AddrRegOperandType>
void
LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
{
this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
DestDataType::label,
this->dest.disassemble(),
this->addr.disassemble());
}
template<typename DestDataType, typename AddrRegOperandType>
void
LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename DestDataType::CType CType M5_VAR_USED;
const VectorMask &mask = w->getPred();
std::vector<Addr> addr_vec;
addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
this->addr.calcVector(w, addr_vec);
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
this->dest.set(w, lane, addr_vec[lane]);
}
}
addr_vec.clear();
}
template<typename MemDataType, typename DestDataType,
typename AddrRegOperandType>
void
LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
{
switch (num_dest_operands) {
case 1:
this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
segmentNames[this->segment],
MemDataType::label,
this->dest.disassemble(),
this->addr.disassemble());
break;
case 2:
this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
segmentNames[this->segment],
MemDataType::label,
this->dest_vect[0].disassemble(),
this->dest_vect[1].disassemble(),
this->addr.disassemble());
break;
case 3:
this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
segmentNames[this->segment],
MemDataType::label,
this->dest_vect[0].disassemble(),
this->dest_vect[1].disassemble(),
this->dest_vect[2].disassemble(),
this->addr.disassemble());
break;
case 4:
this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
this->opcode,
segmentNames[this->segment],
MemDataType::label,
this->dest_vect[0].disassemble(),
this->dest_vect[1].disassemble(),
this->dest_vect[2].disassemble(),
this->dest_vect[3].disassemble(),
this->addr.disassemble());
break;
default:
fatal("Bad ld register dest operand, num vector operands: %d \n",
num_dest_operands);
break;
}
}
static Addr
calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
{
// what is the size of the object we are accessing??
// NOTE: the compiler doesn't generate enough information
// to do this yet..have to just line up all the private
// work-item spaces back to back for now
/*
StorageElement* se =
i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
assert(se);
return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
se->offset * w->computeUnit->wfSize() +
lane * se->size;
*/
// addressing strategy: interleave the private spaces of
// work-items in a wave-front on 8 byte granularity.
// this won't be perfect coalescing like the spill space
// strategy, but it's better than nothing. The spill space
// strategy won't work with private because the same address
// may be accessed by different sized loads/stores.
// Note: I'm assuming that the largest load/store to private
// is 8 bytes. If it is larger, the stride will have to increase
Addr addr_div8 = addr / 8;
Addr addr_mod8 = addr % 8;
Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
addr_mod8 + w->privBase;
assert(ret < w->privBase +
(w->privSizePerItem * w->computeUnit->wfSize()));
return ret;
}
template<typename MemDataType, typename DestDataType,
typename AddrRegOperandType>
void
LdInst<MemDataType, DestDataType,
AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename MemDataType::CType MemCType;
const VectorMask &mask = w->getPred();
// Kernarg references are handled uniquely for now (no Memory Request
// is used), so special-case them up front. Someday we should
// make this more realistic, at which we should get rid of this
// block and fold this case into the switch below.
if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
MemCType val;
// I assume no vector ld for kernargs
assert(num_dest_operands == 1);
// assuming for the moment that we'll never do register
// offsets into kernarg space... just to make life simpler
uint64_t address = this->addr.calcUniform();
val = *(MemCType*)&w->kernelArgs[address];
DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
this->dest.set(w, lane, val);
}
}
return;
} else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
uint64_t address = this->addr.calcUniform();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
MemCType val = w->readCallArgMem<MemCType>(lane, address);
DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
(unsigned long long)val);
this->dest.set(w, lane, val);
}
}
return;
}
GPUDynInstPtr m = gpuDynInst;
this->addr.calcVector(w, m->addr);
m->m_type = MemDataType::memType;
m->v_type = DestDataType::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = this->equivClass;
if (num_dest_operands == 1) {
m->dst_reg = this->dest.regIndex();
m->n_reg = 1;
} else {
m->n_reg = num_dest_operands;
for (int i = 0; i < num_dest_operands; ++i) {
m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
}
}
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->kern_id = w->kernId;
m->cu_id = w->computeUnit->cu_id;
m->latency.init(&w->computeUnit->shader->tick_cnt);
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
// this is a complete hack to get around a compiler bug
// (the compiler currently generates global access for private
// addresses (starting from 0). We need to add the private offset)
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (m->addr[lane] < w->privSizePerItem) {
if (mask[lane]) {
// what is the size of the object we are accessing?
// find base for for this wavefront
// calcPrivAddr will fail if accesses are unaligned
assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
this);
m->addr[lane] = privAddr;
}
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_SPILL:
assert(num_dest_operands == 1);
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
// note: this calculation will NOT WORK if the compiler
// ever generates loads/stores to the same address with
// different widths (e.g., a ld_u32 addr and a ld_u16 addr)
if (mask[lane]) {
assert(m->addr[lane] < w->spillSizePerItem);
m->addr[lane] = m->addr[lane] * w->spillWidth +
lane * sizeof(MemCType) + w->spillBase;
w->lastAddr[lane] = m->addr[lane];
}
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_GROUP:
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
w->outstandingReqsRdLm++;
w->rdLmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_READONLY:
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
m->addr[lane] += w->roBase;
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_PRIVATE:
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] < w->privSizePerItem);
m->addr[lane] = m->addr[lane] +
lane * sizeof(MemCType) + w->privBase;
}
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
break;
default:
fatal("Load to unsupported segment %d %llxe\n", this->segment,
m->addr[0]);
}
w->outstandingReqs++;
w->memReqsInPipe--;
}
template<typename OperationType, typename SrcDataType,
typename AddrRegOperandType>
void
StInst<OperationType, SrcDataType,
AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename OperationType::CType CType;
const VectorMask &mask = w->getPred();
// arg references are handled uniquely for now (no Memory Request
// is used), so special-case them up front. Someday we should
// make this more realistic, at which we should get rid of this
// block and fold this case into the switch below.
if (this->segment == Brig::BRIG_SEGMENT_ARG) {
uint64_t address = this->addr.calcUniform();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType data = this->src.template get<CType>(w, lane);
DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
w->writeCallArgMem<CType>(lane, address, data);
}
}
return;
}
GPUDynInstPtr m = gpuDynInst;
m->exec_mask = w->execMask();
this->addr.calcVector(w, m->addr);
if (num_src_operands == 1) {
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
((CType*)m->d_data)[lane] =
this->src.template get<CType>(w, lane);
}
}
} else {
for (int k= 0; k < num_src_operands; ++k) {
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
this->src_vect[k].template get<CType>(w, lane);
}
}
}
}
m->m_type = OperationType::memType;
m->v_type = OperationType::vgprType;
m->statusBitVector = 0;
m->equiv = this->equivClass;
if (num_src_operands == 1) {
m->n_reg = 1;
} else {
m->n_reg = num_src_operands;
}
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->kern_id = w->kernId;
m->cu_id = w->computeUnit->cu_id;
m->latency.init(&w->computeUnit->shader->tick_cnt);
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
// this is a complete hack to get around a compiler bug
// (the compiler currently generates global access for private
// addresses (starting from 0). We need to add the private offset)
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
if (m->addr[lane] < w->privSizePerItem) {
// calcPrivAddr will fail if accesses are unaligned
assert(!((sizeof(CType)-1) & m->addr[lane]));
Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
this);
m->addr[lane] = privAddr;
}
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsWrGm++;
w->wrGmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_SPILL:
assert(num_src_operands == 1);
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] < w->spillSizePerItem);
m->addr[lane] = m->addr[lane] * w->spillWidth +
lane * sizeof(CType) + w->spillBase;
}
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsWrGm++;
w->wrGmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_GROUP:
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
w->outstandingReqsWrLm++;
w->wrLmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_PRIVATE:
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
{
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
assert(m->addr[lane] < w->privSizePerItem);
m->addr[lane] = m->addr[lane] + lane *
sizeof(CType)+w->privBase;
}
}
}
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsWrGm++;
w->wrGmReqsInPipe--;
break;
default:
fatal("Store to unsupported segment %d\n", this->segment);
}
w->outstandingReqs++;
w->memReqsInPipe--;
}
template<typename OperationType, typename SrcDataType,
typename AddrRegOperandType>
void
StInst<OperationType, SrcDataType,
AddrRegOperandType>::generateDisassembly()
{
switch (num_src_operands) {
case 1:
this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
segmentNames[this->segment],
OperationType::label,
this->src.disassemble(),
this->addr.disassemble());
break;
case 2:
this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
segmentNames[this->segment],
OperationType::label,
this->src_vect[0].disassemble(),
this->src_vect[1].disassemble(),
this->addr.disassemble());
break;
case 4:
this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
this->opcode,
segmentNames[this->segment],
OperationType::label,
this->src_vect[0].disassemble(),
this->src_vect[1].disassemble(),
this->src_vect[2].disassemble(),
this->src_vect[3].disassemble(),
this->addr.disassemble());
break;
default: fatal("Bad ld register src operand, num vector operands: "
"%d \n", num_src_operands);
break;
}
}
template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
bool HasDst>
void
AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
HasDst>::execute(GPUDynInstPtr gpuDynInst)
{
typedef typename DataType::CType CType;
Wavefront *w = gpuDynInst->wavefront();
GPUDynInstPtr m = gpuDynInst;
this->addr.calcVector(w, m->addr);
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((CType *)m->a_data)[lane] =
this->src[0].template get<CType>(w, lane);
}
// load second source operand for CAS
if (NumSrcOperands > 1) {
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((CType*)m->x_data)[lane] =
this->src[1].template get<CType>(w, lane);
}
}
assert(NumSrcOperands <= 2);
m->m_type = DataType::memType;
m->v_type = DataType::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
if (HasDst) {
m->dst_reg = this->dest.regIndex();
}
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->kern_id = w->kernId;
m->cu_id = w->computeUnit->cu_id;
m->latency.init(&w->computeUnit->shader->tick_cnt);
switch (this->segment) {
case Brig::BRIG_SEGMENT_GLOBAL:
m->latency.set(w->computeUnit->shader->ticks(64));
m->pipeId = GLBMEM_PIPE;
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsWrGm++;
w->wrGmReqsInPipe--;
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
break;
case Brig::BRIG_SEGMENT_GROUP:
m->pipeId = LDSMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(24));
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
w->outstandingReqsWrLm++;
w->wrLmReqsInPipe--;
w->outstandingReqsRdLm++;
w->rdLmReqsInPipe--;
break;
default:
fatal("Atomic op to unsupported segment %d\n",
this->segment);
}
w->outstandingReqs++;
w->memReqsInPipe--;
}
const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
bool HasDst>
void
AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
HasDst>::generateDisassembly()
{
if (HasDst) {
this->disassembly =
csprintf("%s_%s_%s_%s %s,%s", this->opcode,
atomicOpToString(this->atomicOperation),
segmentNames[this->segment],
DataType::label, this->dest.disassemble(),
this->addr.disassemble());
} else {
this->disassembly =
csprintf("%s_%s_%s_%s %s", this->opcode,
atomicOpToString(this->atomicOperation),
segmentNames[this->segment],
DataType::label, this->addr.disassemble());
}
for (int i = 0; i < NumSrcOperands; ++i) {
this->disassembly += ",";
this->disassembly += this->src[i].disassemble();
}
}
} // namespace HsailISA

View File

@@ -1,791 +0,0 @@
/*
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Marc Orr
*/
#include <csignal>
#include "arch/hsail/insts/decl.hh"
#include "arch/hsail/insts/mem.hh"
namespace HsailISA
{
// Pseudo (or magic) instructions are overloaded on the hsail call
// instruction, because of its flexible parameter signature.
// To add a new magic instruction:
// 1. Add an entry to the enum.
// 2. Implement it in the switch statement below (Call::exec).
// 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
// so its easy to call from an OpenCL kernel.
// This enum should be identical to the enum in
// hsa/hsail-gpu-compute/util/magicinst.h
enum
{
MAGIC_PRINT_WF_32 = 0,
MAGIC_PRINT_WF_64,
MAGIC_PRINT_LANE,
MAGIC_PRINT_LANE_64,
MAGIC_PRINT_WF_FLOAT,
MAGIC_SIM_BREAK,
MAGIC_PREF_SUM,
MAGIC_REDUCTION,
MAGIC_MASKLANE_LOWER,
MAGIC_MASKLANE_UPPER,
MAGIC_JOIN_WF_BAR,
MAGIC_WAIT_WF_BAR,
MAGIC_PANIC,
MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG,
MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG,
MAGIC_LOAD_GLOBAL_U32_REG,
MAGIC_XACT_CAS_LD,
MAGIC_MOST_SIG_THD,
MAGIC_MOST_SIG_BROADCAST,
MAGIC_PRINT_WFID_32,
MAGIC_PRINT_WFID_64
};
void
Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
{
const VectorMask &mask = w->getPred();
int op = 0;
bool got_op = false;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val0 = src1.get<int>(w, lane, 0);
if (got_op) {
if (src_val0 != op) {
fatal("Multiple magic instructions per PC not "
"supported\n");
}
} else {
op = src_val0;
got_op = true;
}
}
}
switch(op) {
case MAGIC_PRINT_WF_32:
MagicPrintWF32(w);
break;
case MAGIC_PRINT_WF_64:
MagicPrintWF64(w);
break;
case MAGIC_PRINT_LANE:
MagicPrintLane(w);
break;
case MAGIC_PRINT_LANE_64:
MagicPrintLane64(w);
break;
case MAGIC_PRINT_WF_FLOAT:
MagicPrintWFFloat(w);
break;
case MAGIC_SIM_BREAK:
MagicSimBreak(w);
break;
case MAGIC_PREF_SUM:
MagicPrefixSum(w);
break;
case MAGIC_REDUCTION:
MagicReduction(w);
break;
case MAGIC_MASKLANE_LOWER:
MagicMaskLower(w);
break;
case MAGIC_MASKLANE_UPPER:
MagicMaskUpper(w);
break;
case MAGIC_JOIN_WF_BAR:
MagicJoinWFBar(w);
break;
case MAGIC_WAIT_WF_BAR:
MagicWaitWFBar(w);
break;
case MAGIC_PANIC:
MagicPanic(w);
break;
// atomic instructions
case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG:
MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
break;
case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG:
MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
break;
case MAGIC_LOAD_GLOBAL_U32_REG:
MagicLoadGlobalU32Reg(w, gpuDynInst);
break;
case MAGIC_XACT_CAS_LD:
MagicXactCasLd(w);
break;
case MAGIC_MOST_SIG_THD:
MagicMostSigThread(w);
break;
case MAGIC_MOST_SIG_BROADCAST:
MagicMostSigBroadcast(w);
break;
case MAGIC_PRINT_WFID_32:
MagicPrintWF32ID(w);
break;
case MAGIC_PRINT_WFID_64:
MagicPrintWFID64(w);
break;
default: fatal("unrecognized magic instruction: %d\n", op);
}
}
void
Call::MagicPrintLane(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
if (src_val2) {
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
disassemble(), w->computeUnit->cu_id, w->simdId,
w->wfSlotId, lane, src_val1);
} else {
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
disassemble(), w->computeUnit->cu_id, w->simdId,
w->wfSlotId, lane, src_val1);
}
}
}
#endif
}
void
Call::MagicPrintLane64(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
if (src_val2) {
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
disassemble(), w->computeUnit->cu_id, w->simdId,
w->wfSlotId, lane, src_val1);
} else {
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
disassemble(), w->computeUnit->cu_id, w->simdId,
w->wfSlotId, lane, src_val1);
}
}
}
#endif
}
void
Call::MagicPrintWF32(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
std::string res_str;
res_str = csprintf("krl_prt (%s)\n", disassemble());
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 7)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
if (src_val2) {
res_str += csprintf("%08x", src_val1);
} else {
res_str += csprintf("%08d", src_val1);
}
} else {
res_str += csprintf("xxxxxxxx");
}
if ((lane & 7) == 7) {
res_str += csprintf("\n");
} else {
res_str += csprintf(" ");
}
}
res_str += "\n\n";
DPRINTFN(res_str.c_str());
#endif
}
void
Call::MagicPrintWF32ID(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
std::string res_str;
int src_val3 = -1;
res_str = csprintf("krl_prt (%s)\n", disassemble());
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 7)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
src_val3 = src1.get<int>(w, lane, 3);
if (src_val2) {
res_str += csprintf("%08x", src_val1);
} else {
res_str += csprintf("%08d", src_val1);
}
} else {
res_str += csprintf("xxxxxxxx");
}
if ((lane & 7) == 7) {
res_str += csprintf("\n");
} else {
res_str += csprintf(" ");
}
}
res_str += "\n\n";
if (w->wfDynId == src_val3) {
DPRINTFN(res_str.c_str());
}
#endif
}
void
Call::MagicPrintWF64(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
std::string res_str;
res_str = csprintf("krl_prt (%s)\n", disassemble());
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 3)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
if (mask[lane]) {
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
if (src_val2) {
res_str += csprintf("%016x", src_val1);
} else {
res_str += csprintf("%016d", src_val1);
}
} else {
res_str += csprintf("xxxxxxxxxxxxxxxx");
}
if ((lane & 3) == 3) {
res_str += csprintf("\n");
} else {
res_str += csprintf(" ");
}
}
res_str += "\n\n";
DPRINTFN(res_str.c_str());
#endif
}
void
Call::MagicPrintWFID64(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
std::string res_str;
int src_val3 = -1;
res_str = csprintf("krl_prt (%s)\n", disassemble());
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 3)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
if (mask[lane]) {
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
src_val3 = src1.get<int>(w, lane, 3);
if (src_val2) {
res_str += csprintf("%016x", src_val1);
} else {
res_str += csprintf("%016d", src_val1);
}
} else {
res_str += csprintf("xxxxxxxxxxxxxxxx");
}
if ((lane & 3) == 3) {
res_str += csprintf("\n");
} else {
res_str += csprintf(" ");
}
}
res_str += "\n\n";
if (w->wfDynId == src_val3) {
DPRINTFN(res_str.c_str());
}
#endif
}
void
Call::MagicPrintWFFloat(Wavefront *w)
{
#if TRACING_ON
const VectorMask &mask = w->getPred();
std::string res_str;
res_str = csprintf("krl_prt (%s)\n", disassemble());
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (!(lane & 7)) {
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
}
if (mask[lane]) {
float src_val1 = src1.get<float>(w, lane, 1);
res_str += csprintf("%08f", src_val1);
} else {
res_str += csprintf("xxxxxxxx");
}
if ((lane & 7) == 7) {
res_str += csprintf("\n");
} else {
res_str += csprintf(" ");
}
}
res_str += "\n\n";
DPRINTFN(res_str.c_str());
#endif
}
// raises a signal that GDB will catch
// when done with the break, type "signal 0" in gdb to continue
void
Call::MagicSimBreak(Wavefront *w)
{
std::string res_str;
// print out state for this wavefront and then break
res_str = csprintf("Breakpoint encountered for wavefront %i\n",
w->wfSlotId);
res_str += csprintf(" Kern ID: %i\n", w->kernId);
res_str += csprintf(" Phase ID: %i\n", w->simdId);
res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
res_str += csprintf(" Exec mask: ");
for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
if (w->execMask(i))
res_str += "1";
else
res_str += "0";
if ((i & 7) == 7)
res_str += " ";
}
res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
res_str += "\nHelpful debugging hints:\n";
res_str += " Check out w->s_reg / w->d_reg for register state\n";
res_str += "\n\n";
DPRINTFN(res_str.c_str());
fflush(stdout);
raise(SIGTRAP);
}
void
Call::MagicPrefixSum(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int res = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
dest.set<int>(w, lane, res);
res += src_val1;
}
}
}
void
Call::MagicReduction(Wavefront *w)
{
// reduction magic instruction
// The reduction instruction takes up to 64 inputs (one from
// each thread in a WF) and sums them. It returns the sum to
// each thread in the WF.
const VectorMask &mask = w->getPred();
int res = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
res += src_val1;
}
}
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
dest.set<int>(w, lane, res);
}
}
}
void
Call::MagicMaskLower(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int res = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
if (src_val1) {
if (lane < (w->computeUnit->wfSize()/2)) {
res = res | ((uint32_t)(1) << lane);
}
}
}
}
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
dest.set<int>(w, lane, res);
}
}
}
void
Call::MagicMaskUpper(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int res = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
if (src_val1) {
if (lane >= (w->computeUnit->wfSize()/2)) {
res = res | ((uint32_t)(1) <<
(lane - (w->computeUnit->wfSize()/2)));
}
}
}
}
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
dest.set<int>(w, lane, res);
}
}
}
void
Call::MagicJoinWFBar(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int max_cnt = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->barCnt[lane]++;
if (w->barCnt[lane] > max_cnt) {
max_cnt = w->barCnt[lane];
}
}
}
if (max_cnt > w->maxBarCnt) {
w->maxBarCnt = max_cnt;
}
}
void
Call::MagicWaitWFBar(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int max_cnt = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->barCnt[lane]--;
}
if (w->barCnt[lane] > max_cnt) {
max_cnt = w->barCnt[lane];
}
}
if (max_cnt < w->maxBarCnt) {
w->maxBarCnt = max_cnt;
}
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
w->instructionBuffer.end());
if (w->pendingFetch)
w->dropFetch = true;
}
void
Call::MagicPanic(Wavefront *w)
{
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
int src_val1 = src1.get<int>(w, lane, 1);
panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
src_val1, lane);
}
}
}
void
Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
{
// the address is in src1 | src2
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
int src_val1 = src1.get<int>(w, lane, 1);
int src_val2 = src1.get<int>(w, lane, 2);
Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
m->addr[lane] = addr;
}
}
void
Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
{
GPUDynInstPtr m = gpuDynInst;
calcAddr(w, m);
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
}
setFlag(AtomicNoReturn);
setFlag(AtomicAdd);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType;
m->v_type = U32::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsWrGm++;
w->wrGmReqsInPipe--;
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
w->outstandingReqs++;
w->memReqsInPipe--;
}
void
Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
{
GPUDynInstPtr m = gpuDynInst;
calcAddr(w, m);
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
}
setFlag(AtomicNoReturn);
setFlag(AtomicAdd);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType;
m->v_type = U32::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = 0; // atomics don't have an equivalence class operand
m->n_reg = 1;
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(64));
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsWrGm++;
w->wrGmReqsInPipe--;
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
w->outstandingReqs++;
w->memReqsInPipe--;
}
void
Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
{
GPUDynInstPtr m = gpuDynInst;
// calculate the address
calcAddr(w, m);
setFlag(Load);
setFlag(NoScope);
setFlag(NoOrder);
setFlag(GlobalSegment);
m->m_type = U32::memType; //MemDataType::memType;
m->v_type = U32::vgprType; //DestDataType::vgprType;
m->exec_mask = w->execMask();
m->statusBitVector = 0;
m->equiv = 0;
m->n_reg = 1;
// FIXME
//m->dst_reg = this->dest.regIndex();
m->simdId = w->simdId;
m->wfSlotId = w->wfSlotId;
m->wfDynId = w->wfDynId;
m->latency.init(&w->computeUnit->shader->tick_cnt);
m->pipeId = GLBMEM_PIPE;
m->latency.set(w->computeUnit->shader->ticks(1));
w->computeUnit->globalMemoryPipe.issueRequest(m);
w->outstandingReqsRdGm++;
w->rdGmReqsInPipe--;
w->outstandingReqs++;
w->memReqsInPipe--;
}
void
Call::MagicXactCasLd(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int src_val1 = 0;
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
src_val1 = src1.get<int>(w, lane, 1);
break;
}
}
if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue();
w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
}
w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
.push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
}
void
Call::MagicMostSigThread(Wavefront *w)
{
const VectorMask &mask = w->getPred();
unsigned mst = true;
for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
if (mask[lane]) {
dest.set<int>(w, lane, mst);
mst = false;
}
}
}
void
Call::MagicMostSigBroadcast(Wavefront *w)
{
const VectorMask &mask = w->getPred();
int res = 0;
bool got_res = false;
for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
if (mask[lane]) {
if (!got_res) {
res = src1.get<int>(w, lane, 1);
got_res = true;
}
dest.set<int>(w, lane, res);
}
}
}
} // namespace HsailISA

View File

@@ -1,468 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "arch/hsail/operand.hh"
using namespace Brig;
bool
BaseRegOperand::init(unsigned opOffset, const BrigObject *obj,
unsigned &maxRegIdx, char _regFileChar)
{
regFileChar = _regFileChar;
const BrigOperand *brigOp = obj->getOperand(opOffset);
if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER)
return false;
const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp;
regIdx = brigRegOp->regNum;
DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx,
brigRegOp->regKind);
maxRegIdx = std::max(maxRegIdx, regIdx);
return true;
}
void
ListOperand::init(unsigned opOffset, const BrigObject *obj)
{
const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset);
switch (brigOp->kind) {
case BRIG_KIND_OPERAND_CODE_LIST:
{
const BrigOperandCodeList *opList =
(const BrigOperandCodeList*)brigOp;
const Brig::BrigData *oprnd_data =
obj->getBrigBaseData(opList->elements);
// Note: for calls Dest list of operands could be size of 0.
elementCount = oprnd_data->byteCount / 4;
DPRINTF(GPUReg, "Operand Code List: # elements: %d\n",
elementCount);
for (int i = 0; i < elementCount; ++i) {
unsigned *data_offset =
(unsigned*)obj->getData(opList->elements + 4 * (i + 1));
const BrigDirectiveVariable *p =
(const BrigDirectiveVariable*)obj->
getCodeSectionEntry(*data_offset);
StorageElement *se = obj->currentCode->storageMap->
findSymbol(BRIG_SEGMENT_ARG, p);
assert(se);
callArgs.push_back(se);
}
}
break;
default:
fatal("ListOperand: bad operand kind %d\n", brigOp->kind);
}
}
std::string
ListOperand::disassemble()
{
std::string res_str("");
for (auto it : callArgs) {
res_str += csprintf("%s ", it->name.c_str());
}
return res_str;
}
void
FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj)
{
const BrigOperand *baseOp = obj->getOperand(opOffset);
if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) {
fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind);
}
const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp;
const BrigDirectiveExecutable *p =
(const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref);
func_name = obj->getString(p->name);
}
std::string
FunctionRefOperand::disassemble()
{
DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name);
return csprintf("%s", func_name);
}
bool
BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj,
int at, unsigned &maxRegIdx, char _regFileChar)
{
regFileChar = _regFileChar;
const BrigOperand *brigOp = obj->getOperand(opOffset);
if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
return false;
const Brig::BrigOperandOperandList *brigRegVecOp =
(const Brig::BrigOperandOperandList*)brigOp;
unsigned *data_offset =
(unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1));
const BrigOperand *p =
(const BrigOperand*)obj->getOperand(*data_offset);
if (p->kind != BRIG_KIND_OPERAND_REGISTER) {
return false;
}
const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p;
regIdx = brigRegOp->regNum;
DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx,
brigRegOp->regKind);
maxRegIdx = std::max(maxRegIdx, regIdx);
return true;
}
void
BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj,
unsigned &maxRegIdx, char _regFileChar)
{
const char *name = obj->getString(strOffset);
char *endptr;
regIdx = strtoul(name + 2, &endptr, 10);
if (name[0] != '$' || name[1] != _regFileChar) {
fatal("register operand parse error on \"%s\"\n", name);
}
maxRegIdx = std::max(maxRegIdx, regIdx);
}
unsigned SRegOperand::maxRegIdx;
unsigned DRegOperand::maxRegIdx;
unsigned CRegOperand::maxRegIdx;
std::string
SRegOperand::disassemble()
{
return csprintf("$s%d", regIdx);
}
std::string
DRegOperand::disassemble()
{
return csprintf("$d%d", regIdx);
}
std::string
CRegOperand::disassemble()
{
return csprintf("$c%d", regIdx);
}
BrigRegOperandInfo
findRegDataType(unsigned opOffset, const BrigObject *obj)
{
const BrigOperand *baseOp = obj->getOperand(opOffset);
switch (baseOp->kind) {
case BRIG_KIND_OPERAND_REGISTER:
{
const BrigOperandRegister *op = (BrigOperandRegister*)baseOp;
return BrigRegOperandInfo((BrigKind16_t)baseOp->kind,
(BrigRegisterKind)op->regKind);
}
break;
case BRIG_KIND_OPERAND_WAVESIZE:
{
BrigRegisterKind reg_kind = BRIG_REGISTER_KIND_DOUBLE;
return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
}
case BRIG_KIND_OPERAND_OPERAND_LIST:
{
const BrigOperandOperandList *op =
(BrigOperandOperandList*)baseOp;
const BrigData *data_p = (BrigData*)obj->getData(op->elements);
int num_operands = 0;
BrigRegisterKind reg_kind = (BrigRegisterKind)0;
for (int offset = 0; offset < data_p->byteCount; offset += 4) {
const BrigOperand *op_p = (const BrigOperand *)
obj->getOperand(((int *)data_p->bytes)[offset/4]);
if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) {
const BrigOperandRegister *brigRegOp =
(const BrigOperandRegister*)op_p;
reg_kind = (BrigRegisterKind)brigRegOp->regKind;
} else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) {
uint16_t num_bytes =
((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount
- sizeof(BrigBase);
if (num_bytes == sizeof(uint32_t)) {
reg_kind = BRIG_REGISTER_KIND_SINGLE;
} else if (num_bytes == sizeof(uint64_t)) {
reg_kind = BRIG_REGISTER_KIND_DOUBLE;
} else {
fatal("OperandList: bad operand size %d\n", num_bytes);
}
} else if (op_p->kind == BRIG_KIND_OPERAND_WAVESIZE) {
reg_kind = BRIG_REGISTER_KIND_DOUBLE;
} else {
fatal("OperandList: bad operand kind %d\n", op_p->kind);
}
num_operands++;
}
assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST);
return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
}
break;
case BRIG_KIND_OPERAND_ADDRESS:
{
const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
if (!op->reg) {
BrigType type = BRIG_TYPE_NONE;
if (op->symbol) {
const BrigDirective *dir = (BrigDirective*)
obj->getCodeSectionEntry(op->symbol);
assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE);
const BrigDirectiveVariable *sym =
(const BrigDirectiveVariable*)dir;
type = (BrigType)sym->type;
}
return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS,
(BrigType)type);
} else {
const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp;
const BrigOperand *reg = obj->getOperand(b->reg);
const BrigOperandRegister *rop = (BrigOperandRegister*)reg;
return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER,
(BrigRegisterKind)rop->regKind);
}
}
break;
default:
fatal("AddrOperand: bad operand kind %d\n", baseOp->kind);
break;
}
}
void
AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj)
{
assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS);
const BrigDirective *d =
(BrigDirective*)obj->getCodeSectionEntry(op->symbol);
/**
* HSAIL does not properly handle immediate offsets for instruction types
* that utilize them. It currently only supports instructions that use
* variables instead. Again, these pop up in code that is never executed
* (i.e. the HCC AMP codes) so we just hack it here to let us pass through
* the HSAIL object initialization. If such code is ever called, we would
* have to implement this properly.
*/
if (d->kind != BRIG_KIND_DIRECTIVE_VARIABLE) {
warn("HSAIL implementation does not support instructions with "
"address calculations where the operand is not a variable\n");
}
const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d;
name = obj->getString(sym->name);
if (sym->segment != BRIG_SEGMENT_ARG) {
storageElement =
obj->currentCode->storageMap->findSymbol(sym->segment, name);
offset = 0;
} else {
// sym->name does not work for BRIG_SEGMENT_ARG for the following case:
//
// void foo(int a);
// void bar(double a);
//
// foo(...) --> arg_u32 %param_p0;
// st_arg_u32 $s0, [%param_p0];
// call &foo (%param_p0);
// bar(...) --> arg_f64 %param_p0;
// st_arg_u64 $d0, [%param_p0];
// call &foo (%param_p0);
//
// Both functions use the same variable name (param_p0)!!!
//
// Maybe this is a bug in the compiler (I don't know).
//
// Solution:
// Use directive pointer (BrigDirectiveVariable) to differentiate 2
// versions of param_p0.
//
// Note this solution is kind of stupid, because we are pulling stuff
// out of the brig binary via the directive pointer and putting it into
// the symbol table, but now we are indexing the symbol table by the
// brig directive pointer! It makes the symbol table sort of pointless.
// But I don't want to mess with the rest of the infrastructure, so
// let's go with this for now.
//
// When we update the compiler again, we should see if this problem goes
// away. If so, we can fold some of this functionality into the code for
// kernel arguments. If not, maybe we can index the symbol name on a
// hash of the variable AND function name
storageElement = obj->currentCode->
storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym);
assert(storageElement);
}
}
uint64_t
AddrOperandBase::calcUniformBase()
{
// start with offset, will be 0 if not specified
uint64_t address = offset;
// add in symbol value if specified
if (storageElement) {
address += storageElement->offset;
}
return address;
}
std::string
AddrOperandBase::disassemble(std::string reg_disassembly)
{
std::string disasm;
if (offset || reg_disassembly != "") {
disasm += "[";
if (reg_disassembly != "") {
disasm += reg_disassembly;
if (offset > 0) {
disasm += "+";
}
}
if (offset) {
disasm += csprintf("%d", offset);
}
disasm += "]";
} else if (name) {
disasm += csprintf("[%s]", name);
}
return disasm;
}
void
NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj)
{
const BrigOperand *baseOp = obj->getOperand(opOffset);
if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) {
BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp;
parseAddr(addrOp, obj);
offset = (uint64_t(addrOp->offset.hi) << 32) |
uint64_t(addrOp->offset.lo);
} else {
fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind);
}
}
std::string
NoRegAddrOperand::disassemble()
{
return AddrOperandBase::disassemble(std::string(""));
}
void
LabelOperand::init(unsigned opOffset, const BrigObject *obj)
{
const BrigOperandCodeRef *op =
(const BrigOperandCodeRef*)obj->getOperand(opOffset);
assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF);
const BrigDirective *dir =
(const BrigDirective*)obj->getCodeSectionEntry(op->ref);
assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL);
label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj);
}
uint32_t
LabelOperand::getTarget(Wavefront *w, int lane)
{
return label->get();
}
std::string
LabelOperand::disassemble()
{
return label->name;
}

View File

@@ -1,796 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#ifndef __ARCH_HSAIL_OPERAND_HH__
#define __ARCH_HSAIL_OPERAND_HH__
/**
* @file operand.hh
*
* Defines classes encapsulating HSAIL instruction operands.
*/
#include <limits>
#include <string>
#include "arch/hsail/Brig.h"
#include "base/trace.hh"
#include "base/types.hh"
#include "debug/GPUReg.hh"
#include "enums/RegisterType.hh"
#include "gpu-compute/brig_object.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/hsail_code.hh"
#include "gpu-compute/shader.hh"
#include "gpu-compute/vector_register_file.hh"
#include "gpu-compute/wavefront.hh"
class Label;
class StorageElement;
class BaseOperand
{
public:
Enums::RegisterType registerType;
uint32_t regOperandSize;
BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; }
bool isVectorRegister() { return registerType == Enums::RT_VECTOR; }
bool isScalarRegister() { return registerType == Enums::RT_SCALAR; }
bool isCondRegister() { return registerType == Enums::RT_CONDITION; }
unsigned int regIndex() { return 0; }
uint32_t opSize() { return regOperandSize; }
virtual ~BaseOperand() { }
};
class BrigRegOperandInfo
{
public:
Brig::BrigKind16_t kind;
Brig::BrigType type;
Brig::BrigRegisterKind regKind;
BrigRegOperandInfo(Brig::BrigKind16_t _kind,
Brig::BrigRegisterKind _regKind)
: kind(_kind), regKind(_regKind)
{
}
BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type)
: kind(_kind), type(_type)
{
}
BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES),
type(Brig::BRIG_TYPE_NONE)
{
}
};
BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj);
class BaseRegOperand : public BaseOperand
{
public:
unsigned regIdx;
char regFileChar;
bool init(unsigned opOffset, const BrigObject *obj,
unsigned &maxRegIdx, char _regFileChar);
bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at,
unsigned &maxRegIdx, char _regFileChar);
void initWithStrOffset(unsigned strOffset, const BrigObject *obj,
unsigned &maxRegIdx, char _regFileChar);
unsigned int regIndex() { return regIdx; }
};
class SRegOperand : public BaseRegOperand
{
public:
static unsigned maxRegIdx;
bool
init(unsigned opOffset, const BrigObject *obj)
{
regOperandSize = sizeof(uint32_t);
registerType = Enums::RT_VECTOR;
return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's');
}
bool
init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
{
regOperandSize = sizeof(uint32_t);
registerType = Enums::RT_VECTOR;
return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
's');
}
void
initWithStrOffset(unsigned strOffset, const BrigObject *obj)
{
regOperandSize = sizeof(uint32_t);
registerType = Enums::RT_VECTOR;
return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
's');
}
template<typename OperandType>
OperandType
get(Wavefront *w, int lane)
{
assert(sizeof(OperandType) <= sizeof(uint32_t));
assert(regIdx < w->maxSpVgprs);
// if OperandType is smaller than 32-bit, we truncate the value
OperandType ret;
uint32_t vgprIdx;
switch (sizeof(OperandType)) {
case 1: // 1 byte operand
vgprIdx = w->remap(regIdx, 1, 1);
ret = (w->computeUnit->vrf[w->simdId]->
read<uint32_t>(vgprIdx, lane)) & 0xff;
break;
case 2: // 2 byte operand
vgprIdx = w->remap(regIdx, 2, 1);
ret = (w->computeUnit->vrf[w->simdId]->
read<uint32_t>(vgprIdx, lane)) & 0xffff;
break;
case 4: // 4 byte operand
vgprIdx = w->remap(regIdx,sizeof(OperandType), 1);
ret = w->computeUnit->vrf[w->simdId]->
read<OperandType>(vgprIdx, lane);
break;
default:
panic("Bad OperandType\n");
break;
}
return (OperandType)ret;
}
// special get method for compatibility with LabelOperand
uint32_t
getTarget(Wavefront *w, int lane)
{
return get<uint32_t>(w, lane);
}
template<typename OperandType>
void set(Wavefront *w, int lane, OperandType &val);
std::string disassemble();
};
template<typename OperandType>
void
SRegOperand::set(Wavefront *w, int lane, OperandType &val)
{
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
assert(sizeof(OperandType) == sizeof(uint32_t));
assert(regIdx < w->maxSpVgprs);
uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
}
template<>
inline void
SRegOperand::set(Wavefront *w, int lane, uint64_t &val)
{
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
assert(regIdx < w->maxSpVgprs);
uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1);
w->computeUnit->vrf[w->simdId]->write<uint32_t>(vgprIdx, val, lane);
}
class DRegOperand : public BaseRegOperand
{
public:
static unsigned maxRegIdx;
bool
init(unsigned opOffset, const BrigObject *obj)
{
regOperandSize = sizeof(uint64_t);
registerType = Enums::RT_VECTOR;
return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd');
}
bool
init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
{
regOperandSize = sizeof(uint64_t);
registerType = Enums::RT_VECTOR;
return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
'd');
}
void
initWithStrOffset(unsigned strOffset, const BrigObject *obj)
{
regOperandSize = sizeof(uint64_t);
registerType = Enums::RT_VECTOR;
return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
'd');
}
template<typename OperandType>
OperandType
get(Wavefront *w, int lane)
{
assert(sizeof(OperandType) <= sizeof(uint64_t));
// TODO: this check is valid only for HSAIL
assert(regIdx < w->maxDpVgprs);
uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
return w->computeUnit->vrf[w->simdId]->read<OperandType>(vgprIdx,lane);
}
template<typename OperandType>
void
set(Wavefront *w, int lane, OperandType &val)
{
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
val);
assert(sizeof(OperandType) <= sizeof(uint64_t));
// TODO: this check is valid only for HSAIL
assert(regIdx < w->maxDpVgprs);
uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
}
std::string disassemble();
};
class CRegOperand : public BaseRegOperand
{
public:
static unsigned maxRegIdx;
bool
init(unsigned opOffset, const BrigObject *obj)
{
regOperandSize = sizeof(uint8_t);
registerType = Enums::RT_CONDITION;
return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c');
}
bool
init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
{
regOperandSize = sizeof(uint8_t);
registerType = Enums::RT_CONDITION;
return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
'c');
}
void
initWithStrOffset(unsigned strOffset, const BrigObject *obj)
{
regOperandSize = sizeof(uint8_t);
registerType = Enums::RT_CONDITION;
return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
'c');
}
template<typename OperandType>
OperandType
get(Wavefront *w, int lane)
{
assert(regIdx < w->condRegState->numRegs());
return w->condRegState->read<OperandType>((int)regIdx, lane);
}
template<typename OperandType>
void
set(Wavefront *w, int lane, OperandType &val)
{
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
val);
assert(regIdx < w->condRegState->numRegs());
w->condRegState->write<OperandType>(regIdx,lane,val);
}
std::string disassemble();
};
template<typename T>
class ImmOperand : public BaseOperand
{
private:
uint16_t kind;
public:
T bits;
bool init(unsigned opOffset, const BrigObject *obj);
bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
std::string disassemble();
template<typename OperandType>
OperandType
get(Wavefront *w)
{
assert(sizeof(OperandType) <= sizeof(T));
panic_if(w == nullptr, "WF pointer needs to be set");
switch (kind) {
// immediate operand is WF size
case Brig::BRIG_KIND_OPERAND_WAVESIZE:
return (OperandType)w->computeUnit->wfSize();
break;
default:
return *(OperandType*)&bits;
break;
}
}
// This version of get() takes a WF* and a lane id for
// compatibility with the register-based get() methods.
template<typename OperandType>
OperandType
get(Wavefront *w, int lane)
{
return get<OperandType>(w);
}
};
template<typename T>
bool
ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
{
const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
switch (brigOp->kind) {
// this is immediate operand
case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES:
{
DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T),
brigOp->byteCount);
auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
bits = *((T*)(obj->getData(cbptr->bytes + 4)));
kind = brigOp->kind;
return true;
}
break;
case Brig::BRIG_KIND_OPERAND_WAVESIZE:
kind = brigOp->kind;
bits = std::numeric_limits<unsigned long long>::digits;
return true;
default:
kind = Brig::BRIG_KIND_NONE;
return false;
}
}
template <typename T>
bool
ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
{
const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
kind = Brig::BRIG_KIND_NONE;
return false;
}
const Brig::BrigOperandOperandList *brigVecOp =
(const Brig::BrigOperandOperandList *)brigOp;
unsigned *data_offset =
(unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1));
const Brig::BrigOperand *p =
(const Brig::BrigOperand *)obj->getOperand(*data_offset);
if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
kind = Brig::BRIG_KIND_NONE;
return false;
}
return init(*data_offset, obj);
}
template<typename T>
std::string
ImmOperand<T>::disassemble()
{
return csprintf("0x%08x", bits);
}
template<typename RegOperand, typename T>
class RegOrImmOperand : public BaseOperand
{
private:
bool is_imm;
public:
void setImm(const bool value) { is_imm = value; }
ImmOperand<T> imm_op;
RegOperand reg_op;
RegOrImmOperand() { is_imm = false; }
void init(unsigned opOffset, const BrigObject *obj);
void init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
std::string disassemble();
template<typename OperandType>
OperandType
get(Wavefront *w, int lane)
{
return is_imm ? imm_op.template get<OperandType>(w) :
reg_op.template get<OperandType>(w, lane);
}
uint32_t
opSize()
{
if (!is_imm) {
return reg_op.opSize();
}
return 0;
}
bool
isVectorRegister()
{
if (!is_imm) {
return reg_op.registerType == Enums::RT_VECTOR;
}
return false;
}
bool
isCondRegister()
{
if (!is_imm) {
return reg_op.registerType == Enums::RT_CONDITION;
}
return false;
}
bool
isScalarRegister()
{
if (!is_imm) {
return reg_op.registerType == Enums::RT_SCALAR;
}
return false;
}
unsigned int
regIndex()
{
if (!is_imm) {
return reg_op.regIndex();
}
return 0;
}
};
template<typename RegOperand, typename T>
void
RegOrImmOperand<RegOperand, T>::init(unsigned opOffset, const BrigObject *obj)
{
is_imm = false;
if (reg_op.init(opOffset, obj)) {
return;
}
if (imm_op.init(opOffset, obj)) {
is_imm = true;
return;
}
fatal("RegOrImmOperand::init(): bad operand kind %d\n",
obj->getOperand(opOffset)->kind);
}
template<typename RegOperand, typename T>
void
RegOrImmOperand<RegOperand, T>::init_from_vect(unsigned opOffset,
const BrigObject *obj, int at)
{
if (reg_op.init_from_vect(opOffset, obj, at)) {
is_imm = false;
return;
}
if (imm_op.init_from_vect(opOffset, obj, at)) {
is_imm = true;
return;
}
fatal("RegOrImmOperand::init(): bad operand kind %d\n",
obj->getOperand(opOffset)->kind);
}
template<typename RegOperand, typename T>
std::string
RegOrImmOperand<RegOperand, T>::disassemble()
{
return is_imm ? imm_op.disassemble() : reg_op.disassemble();
}
typedef RegOrImmOperand<SRegOperand, uint32_t> SRegOrImmOperand;
typedef RegOrImmOperand<DRegOperand, uint64_t> DRegOrImmOperand;
typedef RegOrImmOperand<CRegOperand, bool> CRegOrImmOperand;
class AddrOperandBase : public BaseOperand
{
protected:
// helper function for init()
void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj);
// helper function for disassemble()
std::string disassemble(std::string reg_disassembly);
uint64_t calcUniformBase();
public:
virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0;
virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
int64_t offset;
const char *name = nullptr;
StorageElement *storageElement;
};
template<typename RegOperandType>
class RegAddrOperand : public AddrOperandBase
{
public:
RegOperandType reg;
void init(unsigned opOffset, const BrigObject *obj);
uint64_t calcUniform();
void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
uint64_t calcLane(Wavefront *w, int lane=0);
uint32_t opSize() { return reg.opSize(); }
bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; }
bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; }
unsigned int regIndex() { return reg.regIndex(); }
std::string disassemble();
};
template<typename RegOperandType>
void
RegAddrOperand<RegOperandType>::init(unsigned opOffset, const BrigObject *obj)
{
using namespace Brig;
const BrigOperand *baseOp = obj->getOperand(opOffset);
switch (baseOp->kind) {
case BRIG_KIND_OPERAND_ADDRESS:
{
const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
storageElement = nullptr;
reg.init(op->reg, obj);
if (reg.regFileChar == 's') {
// if the address expression is 32b, then the hi
// bits of the offset must be set to 0 in the BRIG
assert(!op->offset.hi);
/**
* the offset field of an HSAIL instruction may be negative
* so here we cast the raw bits we get from the BRIG file to
* a signed type to avoid address calculation errors
*/
offset = (int32_t)(op->offset.lo);
reg.regOperandSize = sizeof(uint32_t);
registerType = Enums::RT_VECTOR;
}
else if (reg.regFileChar == 'd') {
offset = (int64_t)(((uint64_t)(op->offset.hi) << 32)
| (uint64_t)(op->offset.lo));
reg.regOperandSize = sizeof(uint64_t);
registerType = Enums::RT_VECTOR;
}
}
break;
default:
fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind);
break;
}
}
template<typename RegOperandType>
uint64_t
RegAddrOperand<RegOperandType>::calcUniform()
{
fatal("can't do calcUniform() on register-based address\n");
return 0;
}
template<typename RegOperandType>
void
RegAddrOperand<RegOperandType>::calcVector(Wavefront *w,
std::vector<Addr> &addrVec)
{
Addr address = calcUniformBase();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (w->execMask(lane)) {
if (reg.regFileChar == 's') {
addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
} else {
addrVec[lane] = address + reg.template get<Addr>(w, lane);
}
}
}
}
template<typename RegOperandType>
uint64_t
RegAddrOperand<RegOperandType>::calcLane(Wavefront *w, int lane)
{
Addr address = calcUniformBase();
return address + reg.template get<Addr>(w, lane);
}
template<typename RegOperandType>
std::string
RegAddrOperand<RegOperandType>::disassemble()
{
return AddrOperandBase::disassemble(reg.disassemble());
}
typedef RegAddrOperand<SRegOperand> SRegAddrOperand;
typedef RegAddrOperand<DRegOperand> DRegAddrOperand;
class NoRegAddrOperand : public AddrOperandBase
{
public:
void init(unsigned opOffset, const BrigObject *obj);
uint64_t calcUniform();
void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
uint64_t calcLane(Wavefront *w, int lane=0);
std::string disassemble();
};
inline uint64_t
NoRegAddrOperand::calcUniform()
{
return AddrOperandBase::calcUniformBase();
}
inline uint64_t
NoRegAddrOperand::calcLane(Wavefront *w, int lane)
{
return calcUniform();
}
inline void
NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec)
{
uint64_t address = calcUniformBase();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane)
addrVec[lane] = address;
}
class LabelOperand : public BaseOperand
{
public:
Label *label;
void init(unsigned opOffset, const BrigObject *obj);
std::string disassemble();
// special get method for compatibility with SRegOperand
uint32_t getTarget(Wavefront *w, int lane);
};
class ListOperand : public BaseOperand
{
public:
int elementCount;
std::vector<StorageElement*> callArgs;
int
getSrcOperand(int idx)
{
DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx,
callArgs.size());
return callArgs.at(idx)->offset;
}
void init(unsigned opOffset, const BrigObject *obj);
std::string disassemble();
template<typename OperandType>
OperandType
get(Wavefront *w, int lane, int arg_idx)
{
return w->readCallArgMem<OperandType>(lane, getSrcOperand(arg_idx));
}
template<typename OperandType>
void
set(Wavefront *w, int lane, OperandType val)
{
w->writeCallArgMem<OperandType>(lane, getSrcOperand(0), val);
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane,
getSrcOperand(0), val);
}
};
class FunctionRefOperand : public BaseOperand
{
public:
const char *func_name;
void init(unsigned opOffset, const BrigObject *obj);
std::string disassemble();
};
#endif // __ARCH_HSAIL_OPERAND_HH__

View File

@@ -1,476 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt, Anthony Gutierrez
*/
#include "gpu-compute/brig_object.hh"
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include <cassert>
#include <cstddef>
#include <cstdlib>
#include "arch/hsail/Brig.h"
#include "base/logging.hh"
#include "base/trace.hh"
#include "debug/BRIG.hh"
#include "debug/HSAILObject.hh"
#include "debug/HSALoader.hh"
using namespace Brig;
std::vector<std::function<HsaObject*(const std::string&, int, uint8_t*)>>
HsaObject::tryFileFuncs = { BrigObject::tryFile };
extern int getBrigDataTypeBytes(BrigType16_t t);
const char *BrigObject::sectionNames[] =
{
"hsa_data",
"hsa_code",
"hsa_operand",
".shstrtab"
};
const char *segmentNames[] =
{
"none",
"flat",
"global",
"readonly",
"kernarg",
"group",
"private",
"spill",
"args"
};
const uint8_t*
BrigObject::getSectionOffset(enum SectionIndex sec, int offs) const
{
// allow offs == size for dummy end pointers
assert(offs <= sectionInfo[sec].size);
return sectionInfo[sec].ptr + offs;
}
const char*
BrigObject::getString(int offs) const
{
return (const char*)(getSectionOffset(DataSectionIndex, offs) + 4);
}
const BrigBase*
BrigObject::getCodeSectionEntry(int offs) const
{
return (const BrigBase*)getSectionOffset(CodeSectionIndex, offs);
}
const BrigData*
BrigObject::getBrigBaseData(int offs) const
{
return (Brig::BrigData*)(getSectionOffset(DataSectionIndex, offs));
}
const uint8_t*
BrigObject::getData(int offs) const
{
return getSectionOffset(DataSectionIndex, offs);
}
const BrigOperand*
BrigObject::getOperand(int offs) const
{
return (const BrigOperand*)getSectionOffset(OperandsSectionIndex, offs);
}
unsigned
BrigObject::getOperandPtr(int offs, int index) const
{
unsigned *op_offs = (unsigned*)(getData(offs + 4 * (index + 1)));
return *op_offs;
}
const BrigInstBase*
BrigObject::getInst(int offs) const
{
return (const BrigInstBase*)getSectionOffset(CodeSectionIndex, offs);
}
HsaCode*
BrigObject::getKernel(const std::string &name) const
{
return nullptr;
}
HsaCode*
BrigObject::getFunction(const std::string &name) const
{
for (int i = 0; i < functions.size(); ++i) {
if (functions[i]->name() == name) {
return functions[i];
}
}
return nullptr;
}
void
BrigObject::processDirectives(const BrigBase *dirPtr, const BrigBase *endPtr,
StorageMap *storageMap)
{
while (dirPtr < endPtr) {
if (!dirPtr->byteCount) {
fatal("Bad directive size 0\n");
}
// calculate next pointer now so we can override it if needed
const BrigBase *nextDirPtr = brigNext(dirPtr);
DPRINTF(HSAILObject, "Code section entry kind: #%x, byte count: %d\n",
dirPtr->kind, dirPtr->byteCount);
switch (dirPtr->kind) {
case BRIG_KIND_DIRECTIVE_FUNCTION:
{
const BrigDirectiveExecutable *p M5_VAR_USED =
reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
DPRINTF(HSAILObject,"DIRECTIVE_FUNCTION: %s offset: "
"%d next: %d\n", getString(p->name),
p->firstCodeBlockEntry, p->nextModuleEntry);
if (p->firstCodeBlockEntry != p->nextModuleEntry) {
// Function calls are not supported. We allow the BRIG
// object file to create stubs, but the function calls will
// not work properly if the application makes use of them.
warn("HSA function invocations are unsupported.\n");
const char *name = getString(p->name);
HsailCode *code_obj = nullptr;
for (int i = 0; i < functions.size(); ++i) {
if (functions[i]->name() == name) {
code_obj = functions[i];
break;
}
}
if (!code_obj) {
// create new local storage map for kernel-local symbols
code_obj = new HsailCode(name, p, this,
new StorageMap(storageMap));
functions.push_back(code_obj);
} else {
panic("Multiple definition of Function!!: %s\n",
getString(p->name));
}
}
nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
}
break;
case BRIG_KIND_DIRECTIVE_KERNEL:
{
const BrigDirectiveExecutable *p =
reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
DPRINTF(HSAILObject,"DIRECTIVE_KERNEL: %s offset: %d count: "
"next: %d\n", getString(p->name),
p->firstCodeBlockEntry, p->nextModuleEntry);
const char *name = getString(p->name);
if (name[0] == '&')
name++;
std::string str = name;
char *temp;
int len = str.length();
if (str[len - 1] >= 'a' && str[len - 1] <= 'z') {
temp = new char[str.size() + 1];
std::copy(str.begin(), str.end() , temp);
temp[str.size()] = '\0';
} else {
temp = new char[str.size()];
std::copy(str.begin(), str.end() - 1 , temp);
temp[str.size() - 1 ] = '\0';
}
std::string kernel_name = temp;
delete[] temp;
HsailCode *code_obj = nullptr;
for (const auto &kernel : kernels) {
if (kernel->name() == kernel_name) {
code_obj = kernel;
break;
}
}
if (!code_obj) {
// create new local storage map for kernel-local symbols
code_obj = new HsailCode(kernel_name, p, this,
new StorageMap(storageMap));
kernels.push_back(code_obj);
}
nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
}
break;
case BRIG_KIND_DIRECTIVE_VARIABLE:
{
const BrigDirectiveVariable *p =
reinterpret_cast<const BrigDirectiveVariable*>(dirPtr);
uint64_t readonlySize_old =
storageMap->getSize(BRIG_SEGMENT_READONLY);
StorageElement* se = storageMap->addSymbol(p, this);
DPRINTF(HSAILObject, "DIRECTIVE_VARIABLE, symbol %s\n",
getString(p->name));
if (p->segment == BRIG_SEGMENT_READONLY) {
// readonly memory has initialization data
uint8_t* readonlyData_old = readonlyData;
readonlyData =
new uint8_t[storageMap->getSize(BRIG_SEGMENT_READONLY)];
if (p->init) {
if ((p->type == BRIG_TYPE_ROIMG) ||
(p->type == BRIG_TYPE_WOIMG) ||
(p->type == BRIG_TYPE_SAMP) ||
(p->type == BRIG_TYPE_SIG32) ||
(p->type == BRIG_TYPE_SIG64)) {
panic("Read only data type not supported: %s\n",
getString(p->name));
}
const BrigOperand *brigOp = getOperand(p->init);
assert(brigOp->kind ==
BRIG_KIND_OPERAND_CONSTANT_BYTES);
const Brig::BrigData *operand_data M5_VAR_USED =
getBrigBaseData(((BrigOperandConstantBytes*)
brigOp)->bytes);
assert((operand_data->byteCount / 4) > 0);
uint8_t *symbol_data =
(uint8_t*)getData(((BrigOperandConstantBytes*)
brigOp)->bytes + 4);
// copy the old data and add the new data
if (readonlySize_old > 0) {
memcpy(readonlyData, readonlyData_old,
readonlySize_old);
}
memcpy(readonlyData + se->offset, symbol_data,
se->size);
delete[] readonlyData_old;
}
}
}
break;
case BRIG_KIND_DIRECTIVE_LABEL:
{
const BrigDirectiveLabel M5_VAR_USED *p =
reinterpret_cast<const BrigDirectiveLabel*>(dirPtr);
panic("Label directives cannot be at the module level: %s\n",
getString(p->name));
}
break;
case BRIG_KIND_DIRECTIVE_COMMENT:
{
const BrigDirectiveComment M5_VAR_USED *p =
reinterpret_cast<const BrigDirectiveComment*>(dirPtr);
DPRINTF(HSAILObject, "DIRECTIVE_COMMENT: %s\n",
getString(p->name));
}
break;
case BRIG_KIND_DIRECTIVE_LOC:
{
DPRINTF(HSAILObject, "BRIG_DIRECTIVE_LOC\n");
}
break;
case BRIG_KIND_DIRECTIVE_MODULE:
{
const BrigDirectiveModule M5_VAR_USED *p =
reinterpret_cast<const BrigDirectiveModule*>(dirPtr);
DPRINTF(HSAILObject, "BRIG_DIRECTIVE_MODULE: %s\n",
getString(p->name));
}
break;
case BRIG_KIND_DIRECTIVE_CONTROL:
{
DPRINTF(HSAILObject, "DIRECTIVE_CONTROL\n");
}
break;
case BRIG_KIND_DIRECTIVE_PRAGMA:
{
DPRINTF(HSAILObject, "DIRECTIVE_PRAGMA\n");
}
break;
case BRIG_KIND_DIRECTIVE_EXTENSION:
{
DPRINTF(HSAILObject, "DIRECTIVE_EXTENSION\n");
}
break;
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
{
DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_START\n");
}
break;
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
{
DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_END\n");
}
break;
default:
if (dirPtr->kind >= BRIG_KIND_INST_BEGIN &&
dirPtr->kind <= BRIG_KIND_INST_END)
break;
if (dirPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
dirPtr->kind <= BRIG_KIND_OPERAND_END)
break;
warn("Unknown Brig directive kind: %d\n", dirPtr->kind);
break;
}
dirPtr = nextDirPtr;
}
}
HsaObject*
BrigObject::tryFile(const std::string &fname, int len, uint8_t *fileData)
{
const char *brig_ident = "HSA BRIG";
if (memcmp(brig_ident, fileData, MODULE_IDENTIFICATION_LENGTH))
return nullptr;
return new BrigObject(fname, len, fileData);
}
BrigObject::BrigObject(const std::string &fname, int len, uint8_t *fileData)
: HsaObject(fname), storageMap(new StorageMap())
{
const char *brig_ident = "HSA BRIG";
BrigModuleHeader *mod_hdr = (BrigModuleHeader*)fileData;
fatal_if(memcmp(brig_ident, mod_hdr, MODULE_IDENTIFICATION_LENGTH),
"%s is not a BRIG file\n", fname);
if (mod_hdr->brigMajor != BRIG_VERSION_BRIG_MAJOR ||
mod_hdr->brigMinor != BRIG_VERSION_BRIG_MINOR) {
fatal("%s: BRIG version mismatch, %d.%d != %d.%d\n",
fname, mod_hdr->brigMajor, mod_hdr->brigMinor,
BRIG_VERSION_BRIG_MAJOR, BRIG_VERSION_BRIG_MINOR);
}
fatal_if(mod_hdr->sectionCount != NumSectionIndices, "%s: BRIG section "
"count (%d) != expected value (%d)\n", fname,
mod_hdr->sectionCount, NumSectionIndices);
for (int i = 0; i < NumSectionIndices; ++i) {
sectionInfo[i].ptr = nullptr;
}
uint64_t *sec_idx_table = (uint64_t*)(fileData + mod_hdr->sectionIndex);
for (int sec_idx = 0; sec_idx < mod_hdr->sectionCount; ++sec_idx) {
uint8_t *sec_hdr_byte_ptr = fileData + sec_idx_table[sec_idx];
BrigSectionHeader *sec_hdr = (BrigSectionHeader*)sec_hdr_byte_ptr;
// It doesn't look like cprintf supports string precision values,
// but if this breaks, the right answer is to fix that
DPRINTF(HSAILObject, "found section %.*s\n", sec_hdr->nameLength,
sec_hdr->name);
sectionInfo[sec_idx].ptr = new uint8_t[sec_hdr->byteCount];
memcpy(sectionInfo[sec_idx].ptr, sec_hdr_byte_ptr, sec_hdr->byteCount);
sectionInfo[sec_idx].size = sec_hdr->byteCount;
}
BrigSectionHeader *code_hdr =
(BrigSectionHeader*)sectionInfo[CodeSectionIndex].ptr;
DPRINTF(HSAILObject, "Code section hdr, count: %d, hdr count: %d, "
"name len: %d\n", code_hdr->byteCount, code_hdr->headerByteCount,
code_hdr->nameLength);
// start at offset 4 to skip initial null entry (see Brig spec)
processDirectives(getCodeSectionEntry(code_hdr->headerByteCount),
getCodeSectionEntry(sectionInfo[CodeSectionIndex].size),
storageMap);
delete[] fileData;
DPRINTF(HSALoader, "BRIG object %s loaded.\n", fname);
}
BrigObject::~BrigObject()
{
for (int i = 0; i < NumSectionIndices; ++i)
if (sectionInfo[i].ptr)
delete[] sectionInfo[i].ptr;
}

View File

@@ -1,134 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt, Anthony Gutierrez
*/
#ifndef __BRIG_OBJECT_HH__
#define __BRIG_OBJECT_HH__
#include <cassert>
#include <cstdint>
#include <string>
#include <vector>
#include "arch/hsail/Brig.h"
#include "gpu-compute/hsa_object.hh"
#include "gpu-compute/hsail_code.hh"
class LabelMap;
class StorageMap;
/* @class BrigObject
* this class implements the BRIG loader object, and
* is used when the simulator directly executes HSAIL.
* this class is responsible for extracting all
* information about kernels contained in BRIG format
* and converts them to HsailCode objects that are
* usable by the simulator and emulated runtime.
*/
class BrigObject final : public HsaObject
{
public:
enum SectionIndex
{
DataSectionIndex,
CodeSectionIndex,
OperandsSectionIndex,
NumSectionIndices
};
static const char *sectionNames[];
struct SectionInfo
{
uint8_t *ptr;
int size;
};
static HsaObject* tryFile(const std::string &fname, int len,
uint8_t *fileData);
SectionInfo sectionInfo[NumSectionIndices];
const uint8_t *getSectionOffset(enum SectionIndex sec, int offs) const;
std::vector<HsailCode*> kernels;
std::vector<HsailCode*> functions;
std::string kern_block_name;
void processDirectives(const Brig::BrigBase *dirPtr,
const Brig::BrigBase *endPtr,
StorageMap *storageMap);
BrigObject(const std::string &fname, int len, uint8_t *fileData);
~BrigObject();
// eventually these will need to be per-kernel not per-object-file
StorageMap *storageMap;
LabelMap *labelMap;
const char* getString(int offs) const;
const Brig::BrigData* getBrigBaseData(int offs) const;
const uint8_t* getData(int offs) const;
const Brig::BrigBase* getCodeSectionEntry(int offs) const;
const Brig::BrigOperand* getOperand(int offs) const;
unsigned getOperandPtr(int offs, int index) const;
const Brig::BrigInstBase* getInst(int offs) const;
HsaCode* getKernel(const std::string &name) const override;
HsaCode* getFunction(const std::string &name) const override;
int numKernels() const override { return kernels.size(); }
HsaCode* getKernel(int i) const override { return kernels[i]; }
// pointer to the current kernel/function we're processing, so elements
// under construction can reference it. kinda ugly, but easier
// than passing it all over for the few places it's needed.
mutable HsailCode *currentCode;
};
// Utility function to bump Brig item pointer to next element given
// item size in bytes. Really just an add but with lots of casting.
template<typename T>
T*
brigNext(T *ptr)
{
Brig::BrigBase *base_ptr = (Brig::BrigBase*)ptr;
int size = base_ptr->byteCount;
assert(size);
return (T*)((uint8_t*)ptr + size);
}
#endif // __BRIG_OBJECT_HH__

View File

@@ -1,279 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#include "gpu-compute/cl_driver.hh"
#include <memory>
#include "base/intmath.hh"
#include "cpu/thread_context.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/hsa_code.hh"
#include "gpu-compute/hsa_kernel_info.hh"
#include "gpu-compute/hsa_object.hh"
#include "params/ClDriver.hh"
#include "sim/process.hh"
#include "sim/syscall_emul_buf.hh"
ClDriver::ClDriver(ClDriverParams *p)
: EmulatedDriver(p), hsaCode(0)
{
for (const auto &codeFile : p->codefile)
codeFiles.push_back(&codeFile);
maxFuncArgsSize = 0;
for (int i = 0; i < codeFiles.size(); ++i) {
HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
for (int k = 0; k < obj->numKernels(); ++k) {
assert(obj->getKernel(k));
kernels.push_back(obj->getKernel(k));
kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
int kern_funcargs_size = kernels.back()->funcarg_size;
maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
kern_funcargs_size : maxFuncArgsSize;
}
}
int name_offs = 0;
int code_offs = 0;
for (int i = 0; i < kernels.size(); ++i) {
kernelInfo.push_back(HsaKernelInfo());
HsaCode *k = kernels[i];
k->generateHsaKernelInfo(&kernelInfo[i]);
kernelInfo[i].name_offs = name_offs;
kernelInfo[i].code_offs = code_offs;
name_offs += k->name().size() + 1;
code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
}
void
ClDriver::handshake(GpuDispatcher *_dispatcher)
{
dispatcher = _dispatcher;
dispatcher->setFuncargsSize(maxFuncArgsSize);
}
int
ClDriver::open(ThreadContext *tc, int mode, int flags)
{
auto p = tc->getProcessPtr();
std::shared_ptr<DeviceFDEntry> fdp;
fdp = std::make_shared<DeviceFDEntry>(this, filename);
int tgt_fd = p->fds->allocFD(fdp);
return tgt_fd;
}
int
ClDriver::ioctl(ThreadContext *tc, unsigned req, Addr buf_addr)
{
switch (req) {
case HSA_GET_SIZES:
{
TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
sizes->num_kernels = kernels.size();
sizes->string_table_size = 0;
sizes->code_size = 0;
sizes->readonly_size = 0;
if (kernels.size() > 0) {
// all kernels will share the same read-only memory
sizes->readonly_size =
kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
// check our assumption
for (int i = 1; i<kernels.size(); ++i) {
assert(sizes->readonly_size ==
kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
}
}
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
// add one for terminating '\0'
sizes->string_table_size += k->name().size() + 1;
sizes->code_size +=
k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
sizes.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_KINFO:
{
TypedBufferArg<HsaKernelInfo>
kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
for (int i = 0; i < kernels.size(); ++i) {
HsaKernelInfo *ki = &kinfo[i];
ki->name_offs = kernelInfo[i].name_offs;
ki->code_offs = kernelInfo[i].code_offs;
ki->sRegCount = kernelInfo[i].sRegCount;
ki->dRegCount = kernelInfo[i].dRegCount;
ki->cRegCount = kernelInfo[i].cRegCount;
ki->static_lds_size = kernelInfo[i].static_lds_size;
ki->private_mem_size = kernelInfo[i].private_mem_size;
ki->spill_mem_size = kernelInfo[i].spill_mem_size;
}
kinfo.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_STRINGS:
{
int string_table_size = 0;
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
string_table_size += k->name().size() + 1;
}
BufferArg buf(buf_addr, string_table_size);
char *bufp = (char*)buf.bufferPtr();
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
const char *n = k->name().c_str();
// idiomatic string copy
while ((*bufp++ = *n++));
}
assert(bufp - (char *)buf.bufferPtr() == string_table_size);
buf.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_READONLY_DATA:
{
// we can pick any kernel --- they share the same
// readonly segment (this assumption is checked in GET_SIZES)
uint64_t size =
kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
BufferArg data(buf_addr, size);
char *datap = (char *)data.bufferPtr();
memcpy(datap,
kernels.back()->readonly_data,
size);
data.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_CODE:
{
// set hsaCode pointer
hsaCode = buf_addr;
int code_size = 0;
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
TheGpuISA::RawMachInst *bufp = buf;
int buf_idx = 0;
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
for (int j = 0; j < k->numInsts(); ++j) {
bufp[buf_idx] = k->insts()->at(j);
++buf_idx;
}
}
buf.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_CU_CNT:
{
BufferArg buf(buf_addr, sizeof(uint32_t));
*((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
buf.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_VSZ:
{
BufferArg buf(buf_addr, sizeof(uint32_t));
*((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
buf.copyOut(tc->getVirtProxy());
}
break;
case HSA_GET_HW_STATIC_CONTEXT_SIZE:
{
BufferArg buf(buf_addr, sizeof(uint32_t));
*((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
buf.copyOut(tc->getVirtProxy());
}
break;
default:
fatal("ClDriver: bad ioctl %d\n", req);
}
return 0;
}
const char*
ClDriver::codeOffToKernelName(uint64_t code_ptr)
{
assert(hsaCode);
uint32_t code_offs = code_ptr - hsaCode;
for (int i = 0; i < kernels.size(); ++i) {
if (code_offs == kernelInfo[i].code_offs) {
return kernels[i]->name().c_str();
}
}
return nullptr;
}
ClDriver*
ClDriverParams::create()
{
return new ClDriver(this);
}

View File

@@ -1,77 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __CL_DRIVER_HH__
#define __CL_DRIVER_HH__
#include <vector>
#include "gpu-compute/hsa_kernel_info.hh"
#include "sim/emul_driver.hh"
class GpuDispatcher;
class HsaCode;
class Process;
class ThreadContext;
struct ClDriverParams;
class ClDriver final : public EmulatedDriver
{
public:
ClDriver(ClDriverParams *p);
void handshake(GpuDispatcher *_dispatcher);
int open(ThreadContext *tc, int mode, int flags);
int ioctl(ThreadContext *tc, unsigned req, Addr buf);
const char* codeOffToKernelName(uint64_t code_ptr);
private:
GpuDispatcher *dispatcher;
std::vector<const std::string*> codeFiles;
// All the kernels we know about
std::vector<HsaCode*> kernels;
std::vector<HsaCode*> functions;
std::vector<HsaKernelInfo> kernelInfo;
// maximum size necessary for function arguments
int maxFuncArgsSize;
// The host virtual address for the kernel code
uint64_t hsaCode;
};
#endif // __CL_DRIVER_HH__

View File

@@ -1,49 +0,0 @@
/*
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __GPU_CL_EVENT_HH__
#define __GPU_CL_EVENT_HH__
struct HsaQueueEntry;
class _cl_event {
public:
_cl_event() : done(false), hsaTaskPtr(nullptr), start(0), end(0) { }
volatile bool done;
HsaQueueEntry *hsaTaskPtr;
uint64_t start;
uint64_t end;
};
#endif // __GPU_CL_EVENT_HH__

View File

@@ -1,83 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: John Kalamatianos
*/
#include "gpu-compute/condition_register_state.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/shader.hh"
#include "gpu-compute/wavefront.hh"
ConditionRegisterState::ConditionRegisterState()
{
computeUnit = nullptr;
c_reg.clear();
busy.clear();
}
void
ConditionRegisterState::setParent(ComputeUnit *_computeUnit)
{
computeUnit = _computeUnit;
_name = computeUnit->name() + ".CondRegState";
}
void
ConditionRegisterState::init(uint32_t _size)
{
c_reg.resize(_size);
busy.resize(_size, 0);
}
void
ConditionRegisterState::exec(GPUDynInstPtr ii, Wavefront *w)
{
// iterate over all operands
for (auto i = 0; i < ii->getNumOperands(); ++i) {
// is this a condition register destination operand?
if (ii->isCondRegister(i) && ii->isDstOperand(i)) {
// mark the register as busy
markReg(ii->getRegisterIndex(i, ii), 1);
uint32_t pipeLen = w->computeUnit->spBypassLength();
// schedule an event for marking the register as ready
w->computeUnit->
registerEvent(w->simdId, ii->getRegisterIndex(i, ii),
ii->getOperandSize(i),
w->computeUnit->shader->tick_cnt +
w->computeUnit->shader->ticks(pipeLen), 0);
}
}
}

View File

@@ -1,101 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: John Kalamatianos
*/
#ifndef __CONDITION_REGISTER_STATE_HH__
#define __CONDITION_REGISTER_STATE_HH__
#include <string>
#include <vector>
#include "gpu-compute/misc.hh"
class ComputeUnit;
class GPUStaticInst;
class Shader;
class Wavefront;
// Condition Register State (used only when executing HSAIL)
class ConditionRegisterState
{
public:
ConditionRegisterState();
void init(uint32_t _size);
const std::string name() const { return _name; }
void setParent(ComputeUnit *_computeUnit);
void regStats() { }
template<typename T>
T
read(int regIdx, int threadId)
{
bool tmp = c_reg[regIdx][threadId];
T *p0 = (T*)(&tmp);
return *p0;
}
template<typename T>
void
write(int regIdx, int threadId, T value)
{
c_reg[regIdx][threadId] = (bool)(value & 0x01);
}
void
markReg(int regIdx, uint8_t value)
{
busy.at(regIdx) = value;
}
uint8_t
regBusy(int idx)
{
uint8_t status = busy.at(idx);
return status;
}
int numRegs() { return c_reg.size(); }
void exec(GPUDynInstPtr ii, Wavefront *w);
private:
ComputeUnit* computeUnit;
std::string _name;
// Condition Register state
std::vector<VectorMask> c_reg;
// flag indicating if a register is busy
std::vector<uint8_t> busy;
};
#endif

View File

@@ -1,101 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __HSA_CODE_HH__
#define __HSA_CODE_HH__
#include <string>
#include <vector>
#include "arch/gpu_types.hh"
#include "config/the_gpu_isa.hh"
class HsaKernelInfo;
/* @class HsaCode
* base code object for the set of HSA kernels associated
* with a single application. this class provides the common
* methods for creating, accessing, and storing information
* about kernel and variable symbols, symbol name, memory
* segment sizes, and instruction count, etc.
*/
class HsaCode
{
public:
HsaCode(const std::string &name) : readonly_data(nullptr), funcarg_size(0),
_name(name)
{
}
enum class MemorySegment {
NONE,
FLAT,
GLOBAL,
READONLY,
KERNARG,
GROUP,
PRIVATE,
SPILL,
ARG,
EXTSPACE0
};
const std::string& name() const { return _name; }
int numInsts() const { return _insts.size(); }
std::vector<TheGpuISA::RawMachInst>* insts() { return &_insts; }
void
setReadonlyData(uint8_t *_readonly_data)
{
readonly_data = _readonly_data;
}
virtual int getSize(MemorySegment segment) const = 0;
virtual void generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const = 0;
uint8_t *readonly_data;
int funcarg_size;
protected:
// An array that stores instruction indices (0 through kernel size)
// for a kernel passed to code object constructor as an argument.
std::vector<TheGpuISA::RawMachInst> _insts;
private:
const std::string _name;
};
#endif // __HSA_CODE_HH__

View File

@@ -1,80 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#ifndef __HSA_KERNEL_INFO_HH__
#define __HSA_KERNEL_INFO_HH__
// This file defines the public interface between the HSA emulated
// driver and application programs.
#include <cstdint>
static const int HSA_GET_SIZES = 0x4801;
static const int HSA_GET_KINFO = 0x4802;
static const int HSA_GET_STRINGS = 0x4803;
static const int HSA_GET_CODE = 0x4804;
static const int HSA_GET_READONLY_DATA = 0x4805;
static const int HSA_GET_CU_CNT = 0x4806;
static const int HSA_GET_VSZ = 0x4807;
static const int HSA_GET_HW_STATIC_CONTEXT_SIZE = 0x4808;
// Return value (via buffer ptr) for HSA_GET_SIZES
struct HsaDriverSizes
{
uint32_t num_kernels;
uint32_t string_table_size;
uint32_t code_size;
uint32_t readonly_size;
};
// HSA_GET_KINFO returns an array of num_kernels of these structs
struct HsaKernelInfo
{
// byte offset into string table
uint32_t name_offs;
// byte offset into code array
uint32_t code_offs;
uint32_t static_lds_size;
uint32_t private_mem_size;
uint32_t spill_mem_size;
// Number of s registers
uint32_t sRegCount;
// Number of d registers
uint32_t dRegCount;
// Number of c registers
uint32_t cRegCount;
};
#endif // __HSA_KERNEL_INFO_HH__

View File

@@ -1,77 +0,0 @@
/*
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#include "gpu-compute/hsa_object.hh"
#include <cassert>
#include <fstream>
#include "base/logging.hh"
HsaObject::HsaObject(const std::string &fname)
: readonlyData(nullptr), filename(fname)
{
}
HsaObject*
HsaObject::createHsaObject(const std::string &fname)
{
HsaObject *hsaObj = nullptr;
uint8_t *file_data = nullptr;
int file_length = 0;
std::ifstream code_file(fname, std::ifstream::ate | std::ifstream::in |
std::ifstream::binary);
assert(code_file.is_open());
assert(code_file.good());
file_length = code_file.tellg();
code_file.seekg(0, code_file.beg);
file_data = new uint8_t[file_length];
code_file.read((char*)file_data, file_length);
code_file.close();
for (const auto &tryFile : tryFileFuncs) {
if ((hsaObj = tryFile(fname, file_length, file_data))) {
return hsaObj;
}
}
delete[] file_data;
fatal("Unknown HSA object type for file: %s.\n", fname);
return nullptr;
}

View File

@@ -1,74 +0,0 @@
/*
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#ifndef __HSA_OBJECT_HH__
#define __HSA_OBJECT_HH__
#include <functional>
#include <string>
#include <vector>
class HsaCode;
/* @class HsaObject
* base loader object for HSA kernels. this class provides
* the base method definitions for loading, storing, and
* accessing HSA kernel objects into the simulator.
*/
class HsaObject
{
public:
HsaObject(const std::string &fileName);
static HsaObject* createHsaObject(const std::string &fname);
static std::vector<std::function<HsaObject*(const std::string&, int,
uint8_t*)>> tryFileFuncs;
virtual HsaCode* getKernel(const std::string &name) const = 0;
virtual HsaCode* getKernel(int i) const = 0;
virtual HsaCode* getFunction(const std::string &name) const = 0;
virtual int numKernels() const = 0;
const std::string& name() const { return filename; }
uint8_t *readonlyData;
protected:
const std::string filename;
};
#endif // __HSA_OBJECT_HH__

View File

@@ -1,460 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "gpu-compute/hsail_code.hh"
#include "arch/gpu_types.hh"
#include "arch/hsail/Brig.h"
#include "arch/hsail/operand.hh"
#include "config/the_gpu_isa.hh"
#include "debug/BRIG.hh"
#include "debug/HSAILObject.hh"
#include "gpu-compute/brig_object.hh"
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/kernel_cfg.hh"
using namespace Brig;
int getBrigDataTypeBytes(BrigType16_t t);
HsailCode::HsailCode(const std::string &name_str)
: HsaCode(name_str), private_size(-1), readonly_size(-1)
{
}
void
HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
StorageMap *objStorageMap)
{
storageMap = objStorageMap;
// set pointer so that decoding process can find this kernel context when
// needed
obj->currentCode = this;
if (code_dir->base.kind != BRIG_KIND_DIRECTIVE_FUNCTION &&
code_dir->base.kind != BRIG_KIND_DIRECTIVE_KERNEL) {
fatal("unexpected directive kind %d inside kernel/function init\n",
code_dir->base.kind);
}
DPRINTF(HSAILObject, "Initializing code, first code block entry is: %d\n",
code_dir->firstCodeBlockEntry);
// clear these static vars so we can properly track the max index
// for this kernel
SRegOperand::maxRegIdx = 0;
DRegOperand::maxRegIdx = 0;
CRegOperand::maxRegIdx = 0;
setPrivateSize(0);
const BrigBase *entryPtr = brigNext((BrigBase*)code_dir);
const BrigBase *endPtr =
obj->getCodeSectionEntry(code_dir->nextModuleEntry);
// the instruction's byte address (relative to the base addr
// of the code section)
int inst_addr = 0;
// the index that points to the instruction in the instruction
// array
int inst_idx = 0;
std::vector<GPUStaticInst*> instructions;
int funcarg_size_scope = 0;
// walk through instructions in code section and directives in
// directive section in parallel, processing directives that apply
// when we reach the relevant code point.
while (entryPtr < endPtr) {
switch (entryPtr->kind) {
case BRIG_KIND_DIRECTIVE_VARIABLE:
{
const BrigDirectiveVariable *sym =
(const BrigDirectiveVariable*)entryPtr;
DPRINTF(HSAILObject,"Initializing code, directive is "
"kind_variable, symbol is: %s\n",
obj->getString(sym->name));
StorageElement *se = storageMap->addSymbol(sym, obj);
if (sym->segment == BRIG_SEGMENT_PRIVATE) {
setPrivateSize(se->size);
} else { // spill
funcarg_size_scope += se->size;
}
}
break;
case BRIG_KIND_DIRECTIVE_LABEL:
{
const BrigDirectiveLabel *lbl =
(const BrigDirectiveLabel*)entryPtr;
DPRINTF(HSAILObject,"Initializing code, directive is "
"kind_label, label is: %s \n",
obj->getString(lbl->name));
labelMap.addLabel(lbl, inst_addr, obj);
}
break;
case BRIG_KIND_DIRECTIVE_PRAGMA:
{
DPRINTF(HSAILObject, "Initializing code, directive "
"is kind_pragma\n");
}
break;
case BRIG_KIND_DIRECTIVE_COMMENT:
{
DPRINTF(HSAILObject, "Initializing code, directive is "
"kind_comment\n");
}
break;
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
{
DPRINTF(HSAILObject, "Initializing code, directive is "
"kind_arg_block_start\n");
storageMap->resetOffset(BRIG_SEGMENT_ARG);
funcarg_size_scope = 0;
}
break;
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
{
DPRINTF(HSAILObject, "Initializing code, directive is "
"kind_arg_block_end\n");
funcarg_size = funcarg_size < funcarg_size_scope ?
funcarg_size_scope : funcarg_size;
}
break;
case BRIG_KIND_DIRECTIVE_END:
DPRINTF(HSAILObject, "Initializing code, dircetive is "
"kind_end\n");
break;
default:
if (entryPtr->kind >= BRIG_KIND_INST_BEGIN &&
entryPtr->kind <= BRIG_KIND_INST_END) {
BrigInstBase *instPtr = (BrigInstBase*)entryPtr;
TheGpuISA::MachInst machInst = { instPtr, obj };
GPUStaticInst *iptr = decoder.decode(machInst);
if (iptr) {
DPRINTF(HSAILObject, "Initializing code, processing inst "
"byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
inst_idx, instPtr->opcode);
TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
iptr->instNum(inst_idx);
iptr->instAddr(inst_addr);
_insts.push_back(raw_inst);
instructions.push_back(iptr);
}
inst_addr += sizeof(TheGpuISA::RawMachInst);
++inst_idx;
} else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
entryPtr->kind < BRIG_KIND_OPERAND_END) {
warn("unexpected operand entry in code segment\n");
} else {
// there are surely some more cases we will need to handle,
// but we'll deal with them as we find them.
fatal("unexpected directive kind %d inside kernel scope\n",
entryPtr->kind);
}
}
entryPtr = brigNext(entryPtr);
}
// compute Control Flow Graph for current kernel
ControlFlowInfo::assignImmediatePostDominators(instructions);
max_sreg = SRegOperand::maxRegIdx;
max_dreg = DRegOperand::maxRegIdx;
max_creg = CRegOperand::maxRegIdx;
obj->currentCode = nullptr;
}
HsailCode::HsailCode(const std::string &name_str,
const BrigDirectiveExecutable *code_dir,
const BrigObject *obj, StorageMap *objStorageMap)
: HsaCode(name_str), private_size(-1), readonly_size(-1)
{
init(code_dir, obj, objStorageMap);
}
void
LabelMap::addLabel(const Brig::BrigDirectiveLabel *lblDir, int inst_index,
const BrigObject *obj)
{
std::string lbl_name = obj->getString(lblDir->name);
Label &lbl = map[lbl_name];
if (lbl.defined()) {
fatal("Attempt to redefine existing label %s\n", lbl_name);
}
lbl.define(lbl_name, inst_index);
DPRINTF(HSAILObject, "label %s = %d\n", lbl_name, inst_index);
}
Label*
LabelMap::refLabel(const Brig::BrigDirectiveLabel *lblDir,
const BrigObject *obj)
{
std::string name = obj->getString(lblDir->name);
Label &lbl = map[name];
lbl.checkName(name);
return &lbl;
}
int
getBrigDataTypeBytes(BrigType16_t t)
{
switch (t) {
case BRIG_TYPE_S8:
case BRIG_TYPE_U8:
case BRIG_TYPE_B8:
return 1;
case BRIG_TYPE_S16:
case BRIG_TYPE_U16:
case BRIG_TYPE_B16:
case BRIG_TYPE_F16:
return 2;
case BRIG_TYPE_S32:
case BRIG_TYPE_U32:
case BRIG_TYPE_B32:
case BRIG_TYPE_F32:
return 4;
case BRIG_TYPE_S64:
case BRIG_TYPE_U64:
case BRIG_TYPE_B64:
case BRIG_TYPE_F64:
return 8;
case BRIG_TYPE_B1:
default:
fatal("unhandled symbol data type %d", t);
return 0;
}
}
StorageElement*
StorageSpace::addSymbol(const BrigDirectiveVariable *sym,
const BrigObject *obj)
{
const char *sym_name = obj->getString(sym->name);
uint64_t size = 0;
uint64_t offset = 0;
if (sym->type & BRIG_TYPE_ARRAY) {
size = getBrigDataTypeBytes(sym->type & ~BRIG_TYPE_ARRAY);
size *= (((uint64_t)sym->dim.hi) << 32 | (uint64_t)sym->dim.lo);
offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type &
~BRIG_TYPE_ARRAY));
} else {
size = getBrigDataTypeBytes(sym->type);
offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type));
}
nextOffset = offset + size;
DPRINTF(HSAILObject, "Adding SYMBOL %s size %d offset %#x, init: %d\n",
sym_name, size, offset, sym->init);
StorageElement* se = new StorageElement(sym_name, offset, size, sym);
elements.push_back(se);
elements_by_addr.insert(AddrRange(offset, offset + size - 1), se);
elements_by_brigptr[sym] = se;
return se;
}
StorageElement*
StorageSpace::findSymbol(std::string name)
{
for (auto it : elements) {
if (it->name == name) {
return it;
}
}
return nullptr;
}
StorageElement*
StorageSpace::findSymbol(uint64_t addr)
{
assert(elements_by_addr.size() > 0);
auto se = elements_by_addr.contains(addr);
if (se == elements_by_addr.end()) {
return nullptr;
} else {
return se->second;
}
}
StorageElement*
StorageSpace::findSymbol(const BrigDirectiveVariable *brigptr)
{
assert(elements_by_brigptr.size() > 0);
auto se = elements_by_brigptr.find(brigptr);
if (se == elements_by_brigptr.end()) {
return nullptr;
} else {
return se->second;
}
}
StorageMap::StorageMap(StorageMap *outerScope)
: outerScopeMap(outerScope)
{
for (int i = 0; i < NumSegments; ++i)
space[i] = new StorageSpace((BrigSegment)i);
}
StorageElement*
StorageMap::addSymbol(const BrigDirectiveVariable *sym, const BrigObject *obj)
{
BrigSegment8_t segment = sym->segment;
assert(segment >= Brig::BRIG_SEGMENT_FLAT);
assert(segment < NumSegments);
return space[segment]->addSymbol(sym, obj);
}
int
StorageMap::getSize(Brig::BrigSegment segment)
{
assert(segment > Brig::BRIG_SEGMENT_GLOBAL);
assert(segment < NumSegments);
if (segment != Brig::BRIG_SEGMENT_GROUP &&
segment != Brig::BRIG_SEGMENT_READONLY) {
return space[segment]->getSize();
} else {
int ret = space[segment]->getSize();
if (outerScopeMap) {
ret += outerScopeMap->getSize(segment);
}
return ret;
}
}
void
StorageMap::resetOffset(Brig::BrigSegment segment)
{
space[segment]->resetOffset();
}
StorageElement*
StorageMap::findSymbol(BrigSegment segment, std::string name)
{
StorageElement *se = space[segment]->findSymbol(name);
if (se)
return se;
if (outerScopeMap)
return outerScopeMap->findSymbol(segment, name);
return nullptr;
}
StorageElement*
StorageMap::findSymbol(Brig::BrigSegment segment, uint64_t addr)
{
StorageSpace *sp = space[segment];
if (!sp) {
// there is no memory in segment?
return nullptr;
}
StorageElement *se = sp->findSymbol(addr);
if (se)
return se;
if (outerScopeMap)
return outerScopeMap->findSymbol(segment, addr);
return nullptr;
}
StorageElement*
StorageMap::findSymbol(Brig::BrigSegment segment,
const BrigDirectiveVariable *brigptr)
{
StorageSpace *sp = space[segment];
if (!sp) {
// there is no memory in segment?
return nullptr;
}
StorageElement *se = sp->findSymbol(brigptr);
if (se)
return se;
if (outerScopeMap)
return outerScopeMap->findSymbol(segment, brigptr);
return nullptr;
}

View File

@@ -1,445 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#ifndef __HSAIL_CODE_HH__
#define __HSAIL_CODE_HH__
#include <cassert>
#include <list>
#include <map>
#include <string>
#include <vector>
#include "arch/gpu_decoder.hh"
#include "arch/hsail/Brig.h"
#include "base/addr_range_map.hh"
#include "base/intmath.hh"
#include "config/the_gpu_isa.hh"
#include "gpu-compute/hsa_code.hh"
#include "gpu-compute/hsa_kernel_info.hh"
#include "gpu-compute/misc.hh"
class BrigObject;
class GPUStaticInst;
inline int
popcount(uint64_t src, int sz)
{
int cnt = 0;
for (int i = 0; i < sz; ++i) {
if (src & 1)
++cnt;
src >>= 1;
}
return cnt;
}
inline int
firstbit(uint64_t src, int sz)
{
int i;
for (i = 0; i < sz; ++i) {
if (src & 1)
break;
src >>= 1;
}
return i;
}
inline int
lastbit(uint64_t src, int sz)
{
int i0 = -1;
for (int i = 0; i < sz; ++i) {
if (src & 1)
i0 = i;
src >>= 1;
}
return i0;
}
inline int
signbit(uint64_t src, int sz)
{
int i0 = -1;
if (src & (1 << (sz - 1))) {
for (int i = 0; i < sz - 1; ++i) {
if (!(src & 1))
i0 = i;
src >>= 1;
}
} else {
for (int i = 0; i < sz - 1; ++i) {
if (src & 1)
i0 = i;
src >>= 1;
}
}
return i0;
}
inline uint64_t
bitrev(uint64_t src, int sz)
{
uint64_t r = 0;
for (int i = 0; i < sz; ++i) {
r <<= 1;
if (src & 1)
r |= 1;
src >>= 1;
}
return r;
}
inline uint64_t
mul_hi(uint32_t a, uint32_t b)
{
return ((uint64_t)a * (uint64_t)b) >> 32;
}
inline uint64_t
mul_hi(int32_t a, int32_t b)
{
return ((int64_t)a * (int64_t)b) >> 32;
}
inline uint64_t
mul_hi(uint64_t a, uint64_t b)
{
return ((uint64_t)a * (uint64_t)b) >> 32;
}
inline uint64_t
mul_hi(int64_t a, int64_t b)
{
return ((int64_t)a * (int64_t)b) >> 32;
}
inline uint64_t
mul_hi(double a, double b)
{
return 0;
}
class Label
{
public:
std::string name;
int value;
Label() : value(-1)
{
}
bool defined() { return value != -1; }
void
checkName(std::string &_name)
{
if (name.empty()) {
name = _name;
} else {
assert(name == _name);
}
}
void
define(std::string &_name, int _value)
{
assert(!defined());
assert(_value != -1);
value = _value;
checkName(_name);
}
int
get()
{
assert(defined());
return value;
}
};
class LabelMap
{
std::map<std::string, Label> map;
public:
LabelMap() { }
void addLabel(const Brig::BrigDirectiveLabel *lbl, int inst_index,
const BrigObject *obj);
Label *refLabel(const Brig::BrigDirectiveLabel *lbl,
const BrigObject *obj);
};
const int NumSegments = Brig::BRIG_SEGMENT_AMD_GCN;
extern const char *segmentNames[];
class StorageElement
{
public:
std::string name;
uint64_t offset;
uint64_t size;
const Brig::BrigDirectiveVariable *brigSymbol;
StorageElement(const char *_name, uint64_t _offset, int _size,
const Brig::BrigDirectiveVariable *sym)
: name(_name), offset(_offset), size(_size), brigSymbol(sym)
{
}
};
class StorageSpace
{
typedef std::map<const Brig::BrigDirectiveVariable*, StorageElement*>
DirVarToSE_map;
std::list<StorageElement*> elements;
AddrRangeMap<StorageElement*> elements_by_addr;
DirVarToSE_map elements_by_brigptr;
uint64_t nextOffset;
public:
StorageSpace(Brig::BrigSegment _class) : nextOffset(0)
{
}
StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
const BrigObject *obj);
StorageElement* findSymbol(std::string name);
StorageElement* findSymbol(uint64_t addr);
StorageElement* findSymbol(const Brig::BrigDirectiveVariable *brigptr);
int getSize() { return nextOffset; }
void resetOffset() { nextOffset = 0; }
};
class StorageMap
{
StorageMap *outerScopeMap;
StorageSpace *space[NumSegments];
public:
StorageMap(StorageMap *outerScope = nullptr);
StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
const BrigObject *obj);
StorageElement* findSymbol(Brig::BrigSegment segment, std::string name);
StorageElement* findSymbol(Brig::BrigSegment segment, uint64_t addr);
StorageElement* findSymbol(Brig::BrigSegment segment,
const Brig::BrigDirectiveVariable *brigptr);
// overloaded version to avoid casting
StorageElement*
findSymbol(Brig::BrigSegment8_t segment, std::string name)
{
return findSymbol((Brig::BrigSegment)segment, name);
}
int getSize(Brig::BrigSegment segment);
void resetOffset(Brig::BrigSegment segment);
};
typedef enum
{
BT_DEFAULT,
BT_B8,
BT_U8,
BT_U16,
BT_U32,
BT_U64,
BT_S8,
BT_S16,
BT_S32,
BT_S64,
BT_F16,
BT_F32,
BT_F64,
BT_NULL
} base_type_e;
/* @class HsailCode
* the HsailCode class is used to store information
* about HSA kernels stored in the BRIG format. it holds
* all information about a kernel, function, or variable
* symbol and provides methods for accessing that
* information.
*/
class HsailCode final : public HsaCode
{
public:
TheGpuISA::Decoder decoder;
StorageMap *storageMap;
LabelMap labelMap;
uint32_t kernarg_start;
uint32_t kernarg_end;
int32_t private_size;
int32_t readonly_size;
// We track the maximum register index used for each register
// class when we load the code so we can size the register files
// appropriately (i.e., one more than the max index).
uint32_t max_creg; // maximum c-register index
uint32_t max_sreg; // maximum s-register index
uint32_t max_dreg; // maximum d-register index
HsailCode(const std::string &name_str,
const Brig::BrigDirectiveExecutable *code_dir,
const BrigObject *obj,
StorageMap *objStorageMap);
// this version is used to create a placeholder when
// we encounter a kernel-related directive before the
// kernel itself
HsailCode(const std::string &name_str);
void init(const Brig::BrigDirectiveExecutable *code_dir,
const BrigObject *obj, StorageMap *objStorageMap);
void
generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const
{
hsaKernelInfo->sRegCount = max_sreg + 1;
hsaKernelInfo->dRegCount = max_dreg + 1;
hsaKernelInfo->cRegCount = max_creg + 1;
hsaKernelInfo->static_lds_size = getSize(Brig::BRIG_SEGMENT_GROUP);
hsaKernelInfo->private_mem_size =
roundUp(getSize(Brig::BRIG_SEGMENT_PRIVATE), 8);
hsaKernelInfo->spill_mem_size =
roundUp(getSize(Brig::BRIG_SEGMENT_SPILL), 8);
}
int
getSize(MemorySegment segment) const
{
Brig::BrigSegment brigSeg;
switch (segment) {
case MemorySegment::NONE:
brigSeg = Brig::BRIG_SEGMENT_NONE;
break;
case MemorySegment::FLAT:
brigSeg = Brig::BRIG_SEGMENT_FLAT;
break;
case MemorySegment::GLOBAL:
brigSeg = Brig::BRIG_SEGMENT_GLOBAL;
break;
case MemorySegment::READONLY:
brigSeg = Brig::BRIG_SEGMENT_READONLY;
break;
case MemorySegment::KERNARG:
brigSeg = Brig::BRIG_SEGMENT_KERNARG;
break;
case MemorySegment::GROUP:
brigSeg = Brig::BRIG_SEGMENT_GROUP;
break;
case MemorySegment::PRIVATE:
brigSeg = Brig::BRIG_SEGMENT_PRIVATE;
break;
case MemorySegment::SPILL:
brigSeg = Brig::BRIG_SEGMENT_SPILL;
break;
case MemorySegment::ARG:
brigSeg = Brig::BRIG_SEGMENT_ARG;
break;
case MemorySegment::EXTSPACE0:
brigSeg = Brig::BRIG_SEGMENT_AMD_GCN;
break;
default:
fatal("Unknown BrigSegment type.\n");
}
return getSize(brigSeg);
}
private:
int
getSize(Brig::BrigSegment segment) const
{
if (segment == Brig::BRIG_SEGMENT_PRIVATE) {
// with the code generated by new HSA compiler the assertion
// does not hold anymore..
//assert(private_size != -1);
return private_size;
} else {
return storageMap->getSize(segment);
}
}
public:
StorageElement*
findSymbol(Brig::BrigSegment segment, uint64_t addr)
{
return storageMap->findSymbol(segment, addr);
}
void
setPrivateSize(int32_t _private_size)
{
private_size = _private_size;
}
Label*
refLabel(const Brig::BrigDirectiveLabel *lbl, const BrigObject *obj)
{
return labelMap.refLabel(lbl, obj);
}
};
#endif // __HSAIL_CODE_HH__

View File

@@ -1,295 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "gpu-compute/kernel_cfg.hh"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <iterator>
#include <map>
#include <string>
#include "gpu-compute/gpu_static_inst.hh"
void
ControlFlowInfo::assignImmediatePostDominators(
const std::vector<GPUStaticInst*>& instructions)
{
ControlFlowInfo cfg(instructions);
cfg.findImmediatePostDominators();
}
ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
instructions(insts)
{
createBasicBlocks();
connectBasicBlocks();
}
BasicBlock*
ControlFlowInfo::basicBlock(int inst_addr) const {
for (auto& block: basicBlocks) {
int first_block_addr = block->firstInstruction->instAddr();
if (inst_addr >= first_block_addr && inst_addr <
first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
return block.get();
}
}
return nullptr;
}
GPUStaticInst*
ControlFlowInfo::lastInstruction(const BasicBlock* block) const
{
if (block->isExit()) {
return nullptr;
}
return instructions.at(block->firstInstruction->instNum() +
block->size - 1);
}
BasicBlock*
ControlFlowInfo::postDominator(const BasicBlock* block) const
{
if (block->isExit()) {
return nullptr;
}
return basicBlock(lastInstruction(block)->ipdInstNum());
}
void
ControlFlowInfo::createBasicBlocks()
{
assert(!instructions.empty());
std::set<int> leaders;
// first instruction is a leader
leaders.insert(0);
for (const auto &instruction : instructions) {
if (instruction->isBranch()) {
const int target_pc = instruction->getTargetPc();
leaders.insert(target_pc);
leaders.insert(instruction->nextInstAddr());
}
}
size_t block_size = 0;
for (const auto &instruction : instructions) {
if (leaders.find(instruction->instAddr()) != leaders.end()) {
uint32_t id = basicBlocks.size();
if (id > 0) {
basicBlocks.back()->size = block_size;
}
block_size = 0;
basicBlocks.emplace_back(new BasicBlock(id, instruction));
}
block_size++;
}
basicBlocks.back()->size = block_size;
// exit basic block
basicBlocks.emplace_back(new BasicBlock(basicBlocks.size(), nullptr));
}
void
ControlFlowInfo::connectBasicBlocks()
{
BasicBlock* exit_bb = basicBlocks.back().get();
for (auto& bb : basicBlocks) {
if (bb->isExit()) {
break;
}
GPUStaticInst* last = lastInstruction(bb.get());
if (last->isReturn()) {
bb->successorIds.insert(exit_bb->id);
continue;
}
if (last->isBranch()) {
const uint32_t target_pc = last->getTargetPc();
BasicBlock* target_bb = basicBlock(target_pc);
bb->successorIds.insert(target_bb->id);
}
// Unconditional jump instructions have a unique successor
if (!last->isUnconditionalJump()) {
BasicBlock* next_bb = basicBlock(last->nextInstAddr());
bb->successorIds.insert(next_bb->id);
}
}
}
// In-place set intersection
static void
intersect(std::set<uint32_t>& a, const std::set<uint32_t>& b)
{
std::set<uint32_t>::iterator it = a.begin();
while (it != a.end()) {
it = b.find(*it) != b.end() ? ++it : a.erase(it);
}
}
void
ControlFlowInfo::findPostDominators()
{
// the only postdominator of the exit block is itself
basicBlocks.back()->postDominatorIds.insert(basicBlocks.back()->id);
//copy all basic blocks to all postdominator lists except for exit block
for (auto& block : basicBlocks) {
if (!block->isExit()) {
for (uint32_t i = 0; i < basicBlocks.size(); i++) {
block->postDominatorIds.insert(i);
}
}
}
bool change = true;
while (change) {
change = false;
for (int h = basicBlocks.size() - 2; h >= 0; --h) {
size_t num_postdominators =
basicBlocks[h]->postDominatorIds.size();
for (int s : basicBlocks[h]->successorIds) {
intersect(basicBlocks[h]->postDominatorIds,
basicBlocks[s]->postDominatorIds);
}
basicBlocks[h]->postDominatorIds.insert(h);
change |= (num_postdominators
!= basicBlocks[h]->postDominatorIds.size());
}
}
}
// In-place set difference
static void
setDifference(std::set<uint32_t>&a,
const std::set<uint32_t>& b, uint32_t exception)
{
for (uint32_t b_elem : b) {
if (b_elem != exception) {
a.erase(b_elem);
}
}
}
void
ControlFlowInfo::findImmediatePostDominators()
{
assert(basicBlocks.size() > 1); // Entry and exit blocks must be present
findPostDominators();
for (auto& basicBlock : basicBlocks) {
if (basicBlock->isExit()) {
continue;
}
std::set<uint32_t> candidates = basicBlock->postDominatorIds;
candidates.erase(basicBlock->id);
for (uint32_t postDominatorId : basicBlock->postDominatorIds) {
if (postDominatorId != basicBlock->id) {
setDifference(candidates,
basicBlocks[postDominatorId]->postDominatorIds,
postDominatorId);
}
}
assert(candidates.size() == 1);
GPUStaticInst* last_instruction = lastInstruction(basicBlock.get());
BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
if (!ipd_block->isExit()) {
GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
last_instruction->ipdInstNum(ipd_first_inst->instAddr());
} else {
last_instruction->ipdInstNum(last_instruction->nextInstAddr());
}
}
}
void
ControlFlowInfo::printPostDominators() const
{
for (auto& block : basicBlocks) {
std::cout << "PD(" << block->id << ") = {";
std::copy(block->postDominatorIds.begin(),
block->postDominatorIds.end(),
std::ostream_iterator<uint32_t>(std::cout, ", "));
std::cout << "}" << std::endl;
}
}
void
ControlFlowInfo::printImmediatePostDominators() const
{
for (const auto& block : basicBlocks) {
if (block->isExit()) {
continue;
}
std::cout << "IPD(" << block->id << ") = ";
std::cout << postDominator(block.get())->id << ", ";
}
std::cout << std::endl;
}
void
ControlFlowInfo::printBasicBlocks() const
{
for (GPUStaticInst* inst : instructions) {
int inst_addr = inst->instAddr();
std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
<< "]: " << inst->disassemble();
if (inst->isBranch()) {
std::cout << ", PC = " << inst->getTargetPc();
}
std::cout << std::endl;
}
}
void
ControlFlowInfo::printBasicBlockDot() const
{
printf("digraph {\n");
for (const auto& basic_block : basicBlocks) {
printf("\t");
for (uint32_t successorId : basic_block->successorIds) {
printf("%d -> %d; ", basic_block->id, successorId);
}
printf("\n");
}
printf("}\n");
}

View File

@@ -1,133 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#ifndef __KERNEL_CFG_HH__
#define __KERNEL_CFG_HH__
#include <cstddef>
#include <cstdint>
#include <memory>
#include <set>
#include <vector>
class GPUStaticInst;
class HsailCode;
struct BasicBlock
{
BasicBlock(uint32_t num, GPUStaticInst* begin) :
id(num), size(0), firstInstruction(begin)
{
}
bool
isEntry() const
{
return !id;
}
bool
isExit() const
{
return !size;
}
/**
* Unique identifier for the block within a given kernel.
*/
const uint32_t id;
/**
* Number of instructions contained in the block
*/
size_t size;
/**
* Pointer to first instruction of the block.
*/
GPUStaticInst* firstInstruction;
/**
* Identifiers of the blocks that follow (are reachable from) this block.
*/
std::set<uint32_t> successorIds;
/**
* Identifiers of the blocks that will be visited from this block.
*/
std::set<uint32_t> postDominatorIds;
};
class ControlFlowInfo
{
public:
/**
* Compute immediate post-dominator instruction for kernel instructions.
*/
static void assignImmediatePostDominators(
const std::vector<GPUStaticInst*>& instructions);
private:
ControlFlowInfo(const std::vector<GPUStaticInst*>& instructions);
GPUStaticInst* lastInstruction(const BasicBlock* block) const;
BasicBlock* basicBlock(int inst_addr) const;
BasicBlock* postDominator(const BasicBlock* block) const;
void createBasicBlocks();
void connectBasicBlocks();
void findPostDominators();
void findImmediatePostDominators();
void printBasicBlocks() const;
void printBasicBlockDot() const;
void printPostDominators() const;
void printImmediatePostDominators() const;
std::vector<std::unique_ptr<BasicBlock>> basicBlocks;
std::vector<GPUStaticInst*> instructions;
};
#endif // __KERNEL_CFG_HH__

View File

@@ -1,70 +0,0 @@
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#ifndef __NDRANGE_HH__
#define __NDRANGE_HH__
#include "base/types.hh"
#include "gpu-compute/qstruct.hh"
struct NDRange
{
// copy of the queue entry provided at dispatch
HsaQueueEntry q;
// The current workgroup id (3 dimensions)
int wgId[3];
// The number of workgroups in each dimension
int numWg[3];
// The total number of workgroups
int numWgTotal;
// The number of completed work groups
int numWgCompleted;
// The global workgroup ID
uint32_t globalWgId;
// flag indicating whether all work groups have been launched
bool wg_disp_rem;
// kernel complete
bool execDone;
bool userDoorBellSet;
volatile bool *addrToNotify;
volatile uint32_t *numDispLeft;
int dispatchId;
int curCid; // Current context id
};
#endif // __NDRANGE_HH__

View File

@@ -1,148 +0,0 @@
/*
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Brad Beckmann, Marc Orr
*/
#ifndef __Q_STRUCT_HH__
#define __Q_STRUCT_HH__
#include <bitset>
#include <cstdint>
// Maximum number of arguments
static const int KER_NUM_ARGS = 32;
// Kernel argument buffer size
static const int KER_ARGS_LENGTH = 512;
class LdsChunk;
struct NDRange;
// Be very careful of alignment in this structure. The structure
// must compile to the same layout in both 32-bit and 64-bit mode.
struct HsaQueueEntry
{
// Base pointer for array of instruction pointers
uint64_t code_ptr;
// Grid Size (3 dimensions)
uint32_t gdSize[3];
// Workgroup Size (3 dimensions)
uint32_t wgSize[3];
uint16_t sRegCount;
uint16_t dRegCount;
uint16_t cRegCount;
uint64_t privMemStart;
uint32_t privMemPerItem;
uint32_t privMemTotal;
uint64_t spillMemStart;
uint32_t spillMemPerItem;
uint32_t spillMemTotal;
uint64_t roMemStart;
uint32_t roMemTotal;
// Size (in bytes) of LDS
uint32_t ldsSize;
// Virtual Memory Id (unused right now)
uint32_t vmId;
// Pointer to dependency chain (unused now)
uint64_t depends;
// pointer to bool
uint64_t addrToNotify;
// pointer to uint32_t
uint64_t numDispLeft;
// variables to pass arguments when running in standalone mode,
// will be removed when run.py and sh.cpp have been updated to
// use args and offset arrays
uint64_t arg1;
uint64_t arg2;
uint64_t arg3;
uint64_t arg4;
// variables to pass arguments when running in cpu+gpu mode
uint8_t args[KER_ARGS_LENGTH];
uint16_t offsets[KER_NUM_ARGS];
uint16_t num_args;
};
// State that needs to be passed between the simulation and simulated app, a
// pointer to this struct can be passed through the depends field in the
// HsaQueueEntry struct
struct HostState
{
// cl_event* has original HsaQueueEntry for init
uint64_t event;
};
// Total number of HSA queues
static const int HSAQ_NQUEUES = 8;
// These values will eventually live in memory mapped registers
// and be settable by the kernel mode driver.
// Number of entries in each HSA queue
static const int HSAQ_SIZE = 64;
// Address of first HSA queue index
static const int HSAQ_INDX_BASE = 0x10000ll;
// Address of first HSA queue
static const int HSAQ_BASE = 0x11000ll;
// Suggested start of HSA code
static const int HSA_CODE_BASE = 0x18000ll;
// These are shortcuts for deriving the address of a specific
// HSA queue or queue index
#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n)
#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue))
#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0))
#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1))
#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2))
/*
* Example code for writing to a queue
*
* void
* ToQueue(int n,struct fsaQueue *val)
* {
* int wi = *(int*)HSAQ_WI(n);
* int ri = *(int*)HSAQ_RI(n);
* int ci = *(int*)HSAQ_CI(n);
*
* if (ci - ri < HSAQ_SIZE) {
* (*(int*)HSAQ_CI(n))++;
* *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val;
* (*(int*)HSAQ_WI(n))++;
* }
* }
*/
#endif // __Q_STRUCT_HH__

View File

@@ -1,71 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: John Kalamatianos
*/
#include "gpu-compute/vector_register_state.hh"
#include <limits>
#include "gpu-compute/compute_unit.hh"
VecRegisterState::VecRegisterState() : computeUnit(nullptr)
{
s_reg.clear();
d_reg.clear();
}
void
VecRegisterState::setParent(ComputeUnit *_computeUnit)
{
computeUnit = _computeUnit;
_name = computeUnit->name() + ".VecRegState";
}
void
VecRegisterState::init(uint32_t _size, uint32_t wf_size)
{
s_reg.resize(_size);
fatal_if(wf_size > std::numeric_limits<unsigned long long>::digits ||
wf_size <= 0,
"WF size is larger than the host can support or is zero");
fatal_if((wf_size & (wf_size - 1)) != 0,
"Wavefront size should be a power of 2");
for (int i = 0; i < s_reg.size(); ++i) {
s_reg[i].resize(wf_size, 0);
}
d_reg.resize(_size);
for (int i = 0; i < d_reg.size(); ++i) {
d_reg[i].resize(wf_size, 0);
}
}

View File

@@ -1,101 +0,0 @@
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: John Kalamatianos
*/
#ifndef __VECTOR_REGISTER_STATE_HH__
#define __VECTOR_REGISTER_STATE_HH__
#include <array>
#include <cassert>
#include <string>
#include <vector>
#include "gpu-compute/misc.hh"
class ComputeUnit;
// Vector Register State per SIMD unit (contents of the vector
// registers in the VRF of the SIMD)
class VecRegisterState
{
public:
VecRegisterState();
void init(uint32_t _size, uint32_t wf_size);
const std::string& name() const { return _name; }
void setParent(ComputeUnit *_computeUnit);
void regStats() { }
// Access methods
template<typename T>
T
read(int regIdx, int threadId=0) {
T *p0;
assert(sizeof(T) == 4 || sizeof(T) == 8);
if (sizeof(T) == 4) {
p0 = (T*)(&s_reg[regIdx][threadId]);
} else {
p0 = (T*)(&d_reg[regIdx][threadId]);
}
return *p0;
}
template<typename T>
void
write(unsigned int regIdx, T value, int threadId=0) {
T *p0;
assert(sizeof(T) == 4 || sizeof(T) == 8);
if (sizeof(T) == 4) {
p0 = (T*)(&s_reg[regIdx][threadId]);
} else {
p0 = (T*)(&d_reg[regIdx][threadId]);
}
*p0 = value;
}
// (Single Precision) Vector Register File size.
int regSize() { return s_reg.size(); }
private:
ComputeUnit *computeUnit;
std::string _name;
// 32-bit Single Precision Vector Register State
std::vector<std::vector<uint32_t>> s_reg;
// 64-bit Double Precision Vector Register State
std::vector<std::vector<uint64_t>> d_reg;
};
#endif // __VECTOR_REGISTER_STATE_HH__

View File

@@ -88,7 +88,7 @@ def _validateTags(commit_header):
# @todo this is error prone, and should be extracted automatically from
# a file
valid_tags = ["arch", "arch-arm", "arch-gcn3", "arch-hsail",
valid_tags = ["arch", "arch-arm", "arch-gcn3",
"arch-mips", "arch-power", "arch-riscv", "arch-sparc", "arch-x86",
"base", "configs", "cpu", "cpu-kvm", "cpu-minor", "cpu-o3",
"cpu-simple", "dev", "dev-arm", "dev-virtio", "ext", "fastmodel",

View File

@@ -49,7 +49,7 @@ add_option('--builds',
'SPARC,' \
'X86,X86_MESI_Two_Level,' \
'RISCV,' \
'HSAIL_X86',
'GCN3_X86',
help="comma-separated build targets to test (default: '%default')")
add_option('--modes',
default='se,fs',