arch, gpu-compute: Remove HSAIL related files
Change-Id: Iefba0a38d62da7598bbfe3fe6ff46454d35144b1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28410 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
d2beb1ffc8
commit
9d51dec937
@@ -29,7 +29,6 @@ arch-arm:
|
||||
Andreas Sandberg <andreas.sandberg@arm.com>
|
||||
Giacomo Travaglini <giacomo.travaglini@arm.com>
|
||||
arch-gcn3:
|
||||
arch-hsail:
|
||||
Tony Gutierrez <anthony.gutierrez@amd.com>
|
||||
arch-mips:
|
||||
arch-power:
|
||||
|
||||
@@ -989,7 +989,7 @@ all_gpu_isa_list.sort()
|
||||
|
||||
sticky_vars.AddVariables(
|
||||
EnumVariable('TARGET_ISA', 'Target ISA', 'null', all_isa_list),
|
||||
EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail', all_gpu_isa_list),
|
||||
EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'gcn3', all_gpu_isa_list),
|
||||
ListVariable('CPU_MODELS', 'CPU models',
|
||||
sorted(n for n,m in CpuModel.dict.items() if m.default),
|
||||
sorted(CpuModel.dict.keys())),
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
PROTOCOL = 'GPU_RfO'
|
||||
TARGET_ISA = 'x86'
|
||||
TARGET_GPU_ISA = 'hsail'
|
||||
BUILD_GPU = True
|
||||
CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
|
||||
@@ -37,6 +37,9 @@ import sys
|
||||
|
||||
Import('*')
|
||||
|
||||
if not env['BUILD_GPU']:
|
||||
Return()
|
||||
|
||||
if env['TARGET_GPU_ISA'] == 'gcn3':
|
||||
Source('decoder.cc')
|
||||
Source('insts/gpu_static_inst.cc')
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
// University of Illinois/NCSA
|
||||
// Open Source License
|
||||
//
|
||||
// Copyright (c) 2013, Advanced Micro Devices, Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// HSA Team
|
||||
//
|
||||
// Advanced Micro Devices, Inc
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal with
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of the LLVM Team, University of Illinois at
|
||||
// Urbana-Champaign, nor the names of its contributors may be used to
|
||||
// endorse or promote products derived from this Software without specific
|
||||
// prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
// SOFTWARE.
|
||||
#ifndef INTERNAL_BRIG_H
|
||||
#define INTERNAL_BRIG_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Brig {
|
||||
#include "Brig_new.hpp"
|
||||
|
||||
// These typedefs provide some backward compatibility with earlier versions
|
||||
// of Brig.h, reducing the number of code changes. The distinct names also
|
||||
// increase legibility by showing the code's intent.
|
||||
typedef BrigBase BrigDirective;
|
||||
typedef BrigBase BrigOperand;
|
||||
|
||||
enum BrigMemoryFenceSegments { // for internal use only
|
||||
//.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc }
|
||||
//.mnemo_token=_EMMemoryFenceSegments
|
||||
//.mnemo_context=EInstModifierInstFenceContext
|
||||
BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0,
|
||||
BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1,
|
||||
BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2,
|
||||
BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // defined(INTERNAL_BRIG_H)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,53 +0,0 @@
|
||||
# -*- mode:python -*-
|
||||
|
||||
# Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: Anthony Gutierrez
|
||||
#
|
||||
|
||||
Import('*')
|
||||
|
||||
if not env['BUILD_GPU']:
|
||||
Return()
|
||||
|
||||
if env['TARGET_GPU_ISA'] == 'hsail':
|
||||
env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
|
||||
'gen.py', '$SOURCE $TARGETS')
|
||||
|
||||
Source('gpu_decoder.cc')
|
||||
Source('insts/branch.cc')
|
||||
Source('insts/gen_exec.cc')
|
||||
Source('insts/gpu_static_inst.cc')
|
||||
Source('insts/main.cc')
|
||||
Source('insts/pseudo_inst.cc')
|
||||
Source('insts/mem.cc')
|
||||
Source('operand.cc')
|
||||
@@ -1,40 +0,0 @@
|
||||
# -*- mode:python -*-
|
||||
|
||||
#
|
||||
# Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: Anthony Gutierrez
|
||||
#
|
||||
|
||||
Import('*')
|
||||
|
||||
all_gpu_isa_list.append('hsail')
|
||||
@@ -1,912 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: Steve Reinhardt
|
||||
#
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys, re
|
||||
|
||||
from m5.util import code_formatter
|
||||
|
||||
if len(sys.argv) != 4:
|
||||
print("Error: need 3 args (file names)")
|
||||
sys.exit(0)
|
||||
|
||||
header_code = code_formatter()
|
||||
decoder_code = code_formatter()
|
||||
exec_code = code_formatter()
|
||||
|
||||
###############
|
||||
#
|
||||
# Generate file prologs (includes etc.)
|
||||
#
|
||||
###############
|
||||
|
||||
header_code('''
|
||||
#include "arch/hsail/insts/decl.hh"
|
||||
#include "base/bitfield.hh"
|
||||
#include "gpu-compute/hsail_code.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
''')
|
||||
header_code.indent()
|
||||
|
||||
decoder_code('''
|
||||
#include "arch/hsail/gpu_decoder.hh"
|
||||
#include "arch/hsail/insts/branch.hh"
|
||||
#include "arch/hsail/insts/decl.hh"
|
||||
#include "arch/hsail/insts/gen_decl.hh"
|
||||
#include "arch/hsail/insts/mem.hh"
|
||||
#include "arch/hsail/insts/mem_impl.hh"
|
||||
#include "gpu-compute/brig_object.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
std::vector<GPUStaticInst*> Decoder::decodedInsts;
|
||||
|
||||
GPUStaticInst*
|
||||
Decoder::decode(MachInst machInst)
|
||||
{
|
||||
using namespace Brig;
|
||||
|
||||
const BrigInstBase *ib = machInst.brigInstBase;
|
||||
const BrigObject *obj = machInst.brigObj;
|
||||
|
||||
switch(ib->opcode) {
|
||||
''')
|
||||
decoder_code.indent()
|
||||
decoder_code.indent()
|
||||
|
||||
exec_code('''
|
||||
#include "arch/hsail/insts/gen_decl.hh"
|
||||
#include "base/intmath.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
''')
|
||||
exec_code.indent()
|
||||
|
||||
###############
|
||||
#
|
||||
# Define code templates for class declarations (for header file)
|
||||
#
|
||||
###############
|
||||
|
||||
# Basic header template for an instruction stub.
|
||||
header_template_stub = '''
|
||||
class $class_name : public $base_class
|
||||
{
|
||||
public:
|
||||
typedef $base_class Base;
|
||||
|
||||
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: Base(ib, obj, "$opcode")
|
||||
{
|
||||
}
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
};
|
||||
|
||||
'''
|
||||
|
||||
# Basic header template for an instruction with no template parameters.
|
||||
header_template_nodt = '''
|
||||
class $class_name : public $base_class
|
||||
{
|
||||
public:
|
||||
typedef $base_class Base;
|
||||
|
||||
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: Base(ib, obj, "$opcode")
|
||||
{
|
||||
}
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
};
|
||||
|
||||
'''
|
||||
|
||||
# Basic header template for an instruction with a single DataType
|
||||
# template parameter.
|
||||
header_template_1dt = '''
|
||||
template<typename DataType>
|
||||
class $class_name : public $base_class<DataType>
|
||||
{
|
||||
public:
|
||||
typedef $base_class<DataType> Base;
|
||||
typedef typename DataType::CType CType;
|
||||
|
||||
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: Base(ib, obj, "$opcode")
|
||||
{
|
||||
}
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
};
|
||||
|
||||
'''
|
||||
|
||||
header_template_1dt_noexec = '''
|
||||
template<typename DataType>
|
||||
class $class_name : public $base_class<DataType>
|
||||
{
|
||||
public:
|
||||
typedef $base_class<DataType> Base;
|
||||
typedef typename DataType::CType CType;
|
||||
|
||||
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: Base(ib, obj, "$opcode")
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
'''
|
||||
|
||||
# Same as header_template_1dt, except the base class has a second
|
||||
# template parameter NumSrcOperands to allow a variable number of
|
||||
# source operands. Note that since this is implemented with an array,
|
||||
# it only works for instructions where all sources are of the same
|
||||
# type (like most arithmetics).
|
||||
header_template_1dt_varsrcs = '''
|
||||
template<typename DataType>
|
||||
class $class_name : public $base_class<DataType, $num_srcs>
|
||||
{
|
||||
public:
|
||||
typedef $base_class<DataType, $num_srcs> Base;
|
||||
typedef typename DataType::CType CType;
|
||||
|
||||
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: Base(ib, obj, "$opcode")
|
||||
{
|
||||
}
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
};
|
||||
|
||||
'''
|
||||
|
||||
# Header template for instruction with two DataType template
|
||||
# parameters, one for the dest and one for the source. This is used
|
||||
# by compare and convert.
|
||||
header_template_2dt = '''
|
||||
template<typename DestDataType, class SrcDataType>
|
||||
class $class_name : public $base_class<DestDataType, SrcDataType>
|
||||
{
|
||||
public:
|
||||
typedef $base_class<DestDataType, SrcDataType> Base;
|
||||
typedef typename DestDataType::CType DestCType;
|
||||
typedef typename SrcDataType::CType SrcCType;
|
||||
|
||||
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: Base(ib, obj, "$opcode")
|
||||
{
|
||||
}
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
};
|
||||
|
||||
'''
|
||||
|
||||
header_templates = {
|
||||
'ArithInst': header_template_1dt_varsrcs,
|
||||
'CmovInst': header_template_1dt,
|
||||
'ClassInst': header_template_1dt,
|
||||
'ShiftInst': header_template_1dt,
|
||||
'ExtractInsertInst': header_template_1dt,
|
||||
'CmpInst': header_template_2dt,
|
||||
'CvtInst': header_template_2dt,
|
||||
'PopcountInst': header_template_2dt,
|
||||
'LdInst': '',
|
||||
'StInst': '',
|
||||
'SpecialInstNoSrc': header_template_nodt,
|
||||
'SpecialInst1Src': header_template_nodt,
|
||||
'SpecialInstNoSrcNoDest': '',
|
||||
'Stub': header_template_stub,
|
||||
}
|
||||
|
||||
###############
|
||||
#
|
||||
# Define code templates for exec functions
|
||||
#
|
||||
###############
|
||||
|
||||
# exec function body
|
||||
exec_template_stub = '''
|
||||
void
|
||||
$class_name::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
|
||||
}
|
||||
|
||||
'''
|
||||
exec_template_nodt_nosrc = '''
|
||||
void
|
||||
$class_name::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef Base::DestCType DestCType;
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
DestCType dest_val = $expr;
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_template_nodt_1src = '''
|
||||
void
|
||||
$class_name::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef Base::DestCType DestCType;
|
||||
typedef Base::SrcCType SrcCType;
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
|
||||
DestCType dest_val = $expr;
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_template_1dt_varsrcs = '''
|
||||
template<typename DataType>
|
||||
void
|
||||
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
CType dest_val;
|
||||
if ($dest_is_src_flag) {
|
||||
dest_val = this->dest.template get<CType>(w, lane);
|
||||
}
|
||||
|
||||
CType src_val[$num_srcs];
|
||||
|
||||
for (int i = 0; i < $num_srcs; ++i) {
|
||||
src_val[i] = this->src[i].template get<CType>(w, lane);
|
||||
}
|
||||
|
||||
dest_val = (CType)($expr);
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_template_1dt_3srcs = '''
|
||||
template<typename DataType>
|
||||
void
|
||||
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef typename Base::Src0CType Src0T;
|
||||
typedef typename Base::Src1CType Src1T;
|
||||
typedef typename Base::Src2CType Src2T;
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
CType dest_val;
|
||||
|
||||
if ($dest_is_src_flag) {
|
||||
dest_val = this->dest.template get<CType>(w, lane);
|
||||
}
|
||||
|
||||
Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
|
||||
Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
|
||||
Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
|
||||
|
||||
dest_val = $expr;
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_template_1dt_2src_1dest = '''
|
||||
template<typename DataType>
|
||||
void
|
||||
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef typename Base::DestCType DestT;
|
||||
typedef CType Src0T;
|
||||
typedef typename Base::Src1CType Src1T;
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
DestT dest_val;
|
||||
if ($dest_is_src_flag) {
|
||||
dest_val = this->dest.template get<DestT>(w, lane);
|
||||
}
|
||||
Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
|
||||
Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
|
||||
|
||||
dest_val = $expr;
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_template_shift = '''
|
||||
template<typename DataType>
|
||||
void
|
||||
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
CType dest_val;
|
||||
|
||||
if ($dest_is_src_flag) {
|
||||
dest_val = this->dest.template get<CType>(w, lane);
|
||||
}
|
||||
|
||||
CType src_val0 = this->src0.template get<CType>(w, lane);
|
||||
uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
|
||||
|
||||
dest_val = $expr;
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_template_2dt = '''
|
||||
template<typename DestDataType, class SrcDataType>
|
||||
void
|
||||
$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
DestCType dest_val;
|
||||
SrcCType src_val[$num_srcs];
|
||||
|
||||
for (int i = 0; i < $num_srcs; ++i) {
|
||||
src_val[i] = this->src[i].template get<SrcCType>(w, lane);
|
||||
}
|
||||
|
||||
dest_val = $expr;
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
exec_templates = {
|
||||
'ArithInst': exec_template_1dt_varsrcs,
|
||||
'CmovInst': exec_template_1dt_3srcs,
|
||||
'ExtractInsertInst': exec_template_1dt_3srcs,
|
||||
'ClassInst': exec_template_1dt_2src_1dest,
|
||||
'CmpInst': exec_template_2dt,
|
||||
'CvtInst': exec_template_2dt,
|
||||
'PopcountInst': exec_template_2dt,
|
||||
'LdInst': '',
|
||||
'StInst': '',
|
||||
'SpecialInstNoSrc': exec_template_nodt_nosrc,
|
||||
'SpecialInst1Src': exec_template_nodt_1src,
|
||||
'SpecialInstNoSrcNoDest': '',
|
||||
'Stub': exec_template_stub,
|
||||
}
|
||||
|
||||
###############
|
||||
#
|
||||
# Define code templates for the decoder cases
|
||||
#
|
||||
###############
|
||||
|
||||
# decode template for nodt-opcode case
|
||||
decode_nodt_template = '''
|
||||
case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
|
||||
|
||||
decode_case_prolog_class_inst = '''
|
||||
case BRIG_OPCODE_$brig_opcode_upper:
|
||||
{
|
||||
//const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
|
||||
BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
|
||||
//switch (baseOp->kind) {
|
||||
// case BRIG_OPERAND_REG:
|
||||
// type = ((const BrigOperandReg*)baseOp)->type;
|
||||
// break;
|
||||
// case BRIG_OPERAND_IMMED:
|
||||
// type = ((const BrigOperandImmed*)baseOp)->type;
|
||||
// break;
|
||||
// default:
|
||||
// fatal("CLASS unrecognized kind of operand %d\\n",
|
||||
// baseOp->kind);
|
||||
//}
|
||||
switch (type) {'''
|
||||
|
||||
# common prolog for 1dt- or 2dt-opcode case: switch on data type
|
||||
decode_case_prolog = '''
|
||||
case BRIG_OPCODE_$brig_opcode_upper:
|
||||
{
|
||||
switch (ib->type) {'''
|
||||
|
||||
# single-level decode case entry (for 1dt opcodes)
|
||||
decode_case_entry = \
|
||||
' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
|
||||
|
||||
decode_store_prolog = \
|
||||
' case BRIG_TYPE_$type_name: {'
|
||||
|
||||
decode_store_case_epilog = '''
|
||||
}'''
|
||||
|
||||
decode_store_case_entry = \
|
||||
' return $constructor(ib, obj);'
|
||||
|
||||
# common epilog for type switch
|
||||
decode_case_epilog = '''
|
||||
default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
|
||||
ib->type);
|
||||
}
|
||||
}
|
||||
break;'''
|
||||
|
||||
# Additional templates for nested decode on a second type field (for
|
||||
# compare and convert). These are used in place of the
|
||||
# decode_case_entry template to create a second-level switch on on the
|
||||
# second type field inside each case of the first-level type switch.
|
||||
# Because the name and location of the second type can vary, the Brig
|
||||
# instruction type must be provided in $brig_type, and the name of the
|
||||
# second type field must be provided in $type_field.
|
||||
decode_case2_prolog = '''
|
||||
case BRIG_TYPE_$type_name:
|
||||
switch (((Brig$brig_type*)ib)->$type2_field) {'''
|
||||
|
||||
decode_case2_entry = \
|
||||
' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
|
||||
|
||||
decode_case2_epilog = '''
|
||||
default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
|
||||
((Brig$brig_type*)ib)->$type2_field);
|
||||
}
|
||||
break;'''
|
||||
|
||||
# Figure out how many source operands an expr needs by looking for the
|
||||
# highest-numbered srcN value referenced. Since sources are numbered
|
||||
# starting at 0, the return value is N+1.
|
||||
def num_src_operands(expr):
|
||||
if expr.find('src2') != -1:
|
||||
return 3
|
||||
elif expr.find('src1') != -1:
|
||||
return 2
|
||||
elif expr.find('src0') != -1:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
###############
|
||||
#
|
||||
# Define final code generation methods
|
||||
#
|
||||
# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
|
||||
# generating actual instructions.
|
||||
#
|
||||
###############
|
||||
|
||||
# Generate class declaration, exec function, and decode switch case
|
||||
# for an brig_opcode with a single-level type switch. The 'types'
|
||||
# parameter is a list or tuple of types for which the instruction
|
||||
# should be instantiated.
|
||||
def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
|
||||
type2_info=None, constructor_prefix='new ', is_store=False):
|
||||
brig_opcode_upper = brig_opcode.upper()
|
||||
class_name = brig_opcode
|
||||
opcode = class_name.lower()
|
||||
|
||||
if base_class == 'ArithInst':
|
||||
# note that expr must be provided with ArithInst so we can
|
||||
# derive num_srcs for the template
|
||||
assert expr
|
||||
|
||||
if expr:
|
||||
# Derive several bits of info from expr. If expr is not used,
|
||||
# this info will be irrelevant.
|
||||
num_srcs = num_src_operands(expr)
|
||||
# if the RHS expression includes 'dest', then we're doing an RMW
|
||||
# on the reg and we need to treat it like a source
|
||||
dest_is_src = expr.find('dest') != -1
|
||||
dest_is_src_flag = str(dest_is_src).lower() # for C++
|
||||
if base_class in ['ShiftInst']:
|
||||
expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
|
||||
elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
|
||||
expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
|
||||
else:
|
||||
expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
|
||||
expr = re.sub(r'\bdest\b', r'dest_val', expr)
|
||||
|
||||
# Strip template arguments off of base class before looking up
|
||||
# appropriate templates
|
||||
base_class_base = re.sub(r'<.*>$', '', base_class)
|
||||
header_code(header_templates[base_class_base])
|
||||
|
||||
if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
|
||||
exec_code(exec_templates[base_class_base])
|
||||
elif base_class.startswith('ShiftInst'):
|
||||
header_code(exec_template_shift)
|
||||
else:
|
||||
header_code(exec_templates[base_class_base])
|
||||
|
||||
if not types or isinstance(types, str):
|
||||
# Just a single type
|
||||
constructor = constructor_prefix + class_name
|
||||
decoder_code(decode_nodt_template)
|
||||
else:
|
||||
# multiple types, need at least one level of decode
|
||||
if brig_opcode == 'Class':
|
||||
decoder_code(decode_case_prolog_class_inst)
|
||||
else:
|
||||
decoder_code(decode_case_prolog)
|
||||
if not type2_info:
|
||||
if not is_store:
|
||||
# single list of types, to basic one-level decode
|
||||
for type_name in types:
|
||||
full_class_name = '%s<%s>' % (class_name, type_name.upper())
|
||||
constructor = constructor_prefix + full_class_name
|
||||
decoder_code(decode_case_entry)
|
||||
else:
|
||||
# single list of types, to basic one-level decode
|
||||
for type_name in types:
|
||||
decoder_code(decode_store_prolog)
|
||||
type_size = int(re.findall(r'[0-9]+', type_name)[0])
|
||||
src_size = 32
|
||||
type_type = type_name[0]
|
||||
full_class_name = '%s<%s,%s>' % (class_name, \
|
||||
type_name.upper(), \
|
||||
'%s%d' % \
|
||||
(type_type.upper(), \
|
||||
type_size))
|
||||
constructor = constructor_prefix + full_class_name
|
||||
decoder_code(decode_store_case_entry)
|
||||
decoder_code(decode_store_case_epilog)
|
||||
else:
|
||||
# need secondary type switch (convert, compare)
|
||||
# unpack extra info on second switch
|
||||
(type2_field, types2) = type2_info
|
||||
brig_type = 'Inst%s' % brig_opcode
|
||||
for type_name in types:
|
||||
decoder_code(decode_case2_prolog)
|
||||
fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
|
||||
for type2_name in types2:
|
||||
full_class_name = fmt % type2_name.upper()
|
||||
constructor = constructor_prefix + full_class_name
|
||||
decoder_code(decode_case2_entry)
|
||||
|
||||
decoder_code(decode_case2_epilog)
|
||||
|
||||
decoder_code(decode_case_epilog)
|
||||
|
||||
###############
|
||||
#
|
||||
# Generate instructions
|
||||
#
|
||||
###############
|
||||
|
||||
# handy abbreviations for common sets of types
|
||||
|
||||
# arithmetic ops are typically defined only on 32- and 64-bit sizes
|
||||
arith_int_types = ('S32', 'U32', 'S64', 'U64')
|
||||
arith_float_types = ('F32', 'F64')
|
||||
arith_types = arith_int_types + arith_float_types
|
||||
|
||||
bit_types = ('B1', 'B32', 'B64')
|
||||
|
||||
all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
|
||||
|
||||
# I think you might be able to do 'f16' memory ops too, but we'll
|
||||
# ignore them for now.
|
||||
mem_types = all_int_types + arith_float_types
|
||||
mem_atom_types = all_int_types + ('B32', 'B64')
|
||||
|
||||
##### Arithmetic & logical operations
|
||||
gen('Add', arith_types, 'src0 + src1')
|
||||
gen('Sub', arith_types, 'src0 - src1')
|
||||
gen('Mul', arith_types, 'src0 * src1')
|
||||
gen('Div', arith_types, 'src0 / src1')
|
||||
gen('Min', arith_types, 'std::min(src0, src1)')
|
||||
gen('Max', arith_types, 'std::max(src0, src1)')
|
||||
gen('Gcnmin', arith_types, 'std::min(src0, src1)')
|
||||
|
||||
gen('CopySign', arith_float_types,
|
||||
'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
|
||||
gen('Sqrt', arith_float_types, 'sqrt(src0)')
|
||||
gen('Floor', arith_float_types, 'floor(src0)')
|
||||
|
||||
# "fast" sqrt... same as slow for us
|
||||
gen('Nsqrt', arith_float_types, 'sqrt(src0)')
|
||||
gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
|
||||
gen('Nrcp', arith_float_types, '1.0/src0')
|
||||
gen('Fract', arith_float_types,
|
||||
'(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
|
||||
|
||||
gen('Ncos', arith_float_types, 'cos(src0)');
|
||||
gen('Nsin', arith_float_types, 'sin(src0)');
|
||||
|
||||
gen('And', bit_types, 'src0 & src1')
|
||||
gen('Or', bit_types, 'src0 | src1')
|
||||
gen('Xor', bit_types, 'src0 ^ src1')
|
||||
|
||||
gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
|
||||
gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
|
||||
('sourceType', ('B32', 'B64')))
|
||||
|
||||
gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
|
||||
gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
|
||||
|
||||
# gen('Mul_hi', types=('s32','u32', '??'))
|
||||
# gen('Mul24', types=('s32','u32', '??'))
|
||||
gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
|
||||
|
||||
gen('Abs', arith_types, 'std::abs(src0)')
|
||||
gen('Neg', arith_types, '-src0')
|
||||
|
||||
gen('Mov', bit_types + arith_types, 'src0')
|
||||
gen('Not', bit_types, 'heynot(src0)')
|
||||
|
||||
# mad and fma differ only in rounding behavior, which we don't emulate
|
||||
# also there's an integer form of mad, but not of fma
|
||||
gen('Mad', arith_types, 'src0 * src1 + src2')
|
||||
gen('Fma', arith_float_types, 'src0 * src1 + src2')
|
||||
|
||||
#native floating point operations
|
||||
gen('Nfma', arith_float_types, 'src0 * src1 + src2')
|
||||
|
||||
gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
|
||||
gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
|
||||
gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
|
||||
|
||||
# see base/bitfield.hh
|
||||
gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
|
||||
'ExtractInsertInst')
|
||||
|
||||
gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
|
||||
'ExtractInsertInst')
|
||||
|
||||
##### Compare
|
||||
gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
|
||||
'CmpInst', ('sourceType', arith_types + bit_types))
|
||||
gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
|
||||
|
||||
##### Conversion
|
||||
|
||||
# Conversion operations are only defined on B1, not B32 or B64
|
||||
cvt_types = ('B1',) + mem_types
|
||||
|
||||
gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
|
||||
|
||||
|
||||
##### Load & Store
|
||||
gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
|
||||
gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
|
||||
gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
|
||||
is_store=True)
|
||||
gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
|
||||
gen('AtomicNoRet', mem_atom_types, base_class='StInst',
|
||||
constructor_prefix='decode')
|
||||
|
||||
gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
|
||||
gen('Br', base_class = 'LdInst', constructor_prefix='decode')
|
||||
|
||||
##### Special operations
|
||||
def gen_special(brig_opcode, expr, dest_type='U32'):
|
||||
num_srcs = num_src_operands(expr)
|
||||
if num_srcs == 0:
|
||||
base_class = 'SpecialInstNoSrc<%s>' % dest_type
|
||||
elif num_srcs == 1:
|
||||
base_class = 'SpecialInst1Src<%s>' % dest_type
|
||||
else:
|
||||
assert false
|
||||
|
||||
gen(brig_opcode, None, expr, base_class)
|
||||
|
||||
gen_special('WorkItemId', 'w->workItemId[src0][lane]')
|
||||
gen_special('WorkItemAbsId',
|
||||
'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
|
||||
gen_special('WorkGroupId', 'w->workGroupId[src0]')
|
||||
gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
|
||||
gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
|
||||
gen_special('GridSize', 'w->gridSz[src0]')
|
||||
gen_special('GridGroups',
|
||||
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
|
||||
gen_special('LaneId', 'lane')
|
||||
gen_special('WaveId', 'w->wfId')
|
||||
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
|
||||
|
||||
# gen_special('CU'', ')
|
||||
|
||||
gen('Ret', base_class='SpecialInstNoSrcNoDest')
|
||||
gen('Barrier', base_class='SpecialInstNoSrcNoDest')
|
||||
gen('MemFence', base_class='SpecialInstNoSrcNoDest')
|
||||
|
||||
# Map magic instructions to the BrigSyscall opcode
|
||||
# Magic instructions are defined in magic.hh
|
||||
#
|
||||
# In the future, real HSA kernel system calls can be implemented and coexist
|
||||
# with magic instructions.
|
||||
gen('Call', base_class='SpecialInstNoSrcNoDest')
|
||||
|
||||
# Stubs for unimplemented instructions:
|
||||
# These may need to be implemented at some point in the future, but
|
||||
# for now we just match the instructions with their operands.
|
||||
#
|
||||
# By defining stubs for these instructions, we can work with
|
||||
# applications that have them in dead/unused code paths.
|
||||
#
|
||||
# Needed for rocm-hcc compilations for HSA backends since
|
||||
# builtins-hsail library is `cat`d onto the generated kernels.
|
||||
# The builtins-hsail library consists of handcoded hsail functions
|
||||
# that __might__ be needed by the rocm-hcc compiler in certain binaries.
|
||||
gen('Bitmask', base_class='Stub')
|
||||
gen('Bitrev', base_class='Stub')
|
||||
gen('Firstbit', base_class='Stub')
|
||||
gen('Lastbit', base_class='Stub')
|
||||
gen('Unpacklo', base_class='Stub')
|
||||
gen('Unpackhi', base_class='Stub')
|
||||
gen('Pack', base_class='Stub')
|
||||
gen('Unpack', base_class='Stub')
|
||||
gen('Lerp', base_class='Stub')
|
||||
gen('Packcvt', base_class='Stub')
|
||||
gen('Unpackcvt', base_class='Stub')
|
||||
gen('Sad', base_class='Stub')
|
||||
gen('Sadhi', base_class='Stub')
|
||||
gen('Activelanecount', base_class='Stub')
|
||||
gen('Activelaneid', base_class='Stub')
|
||||
gen('Activelanemask', base_class='Stub')
|
||||
gen('Activelanepermute', base_class='Stub')
|
||||
gen('Groupbaseptr', base_class='Stub')
|
||||
gen('Signalnoret', base_class='Stub')
|
||||
|
||||
###############
|
||||
#
|
||||
# Generate file epilogs
|
||||
#
|
||||
###############
|
||||
header_code('''
|
||||
template<>
|
||||
inline void
|
||||
Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
CType dest_val;
|
||||
CType src_val;
|
||||
|
||||
src_val = this->src[0].template get<CType>(w, lane);
|
||||
|
||||
dest_val = (CType)(src_val);
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void
|
||||
Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
CType dest_val;
|
||||
CType src_val;
|
||||
|
||||
src_val = this->src[0].template get<CType>(w, lane);
|
||||
|
||||
dest_val = (CType)(src_val);
|
||||
|
||||
this->dest.set(w, lane, dest_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
''')
|
||||
|
||||
header_code.dedent()
|
||||
header_code('''
|
||||
} // namespace HsailISA
|
||||
''')
|
||||
|
||||
# close off main decode switch
|
||||
decoder_code.dedent()
|
||||
decoder_code.dedent()
|
||||
decoder_code('''
|
||||
default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
|
||||
} // end switch(ib->opcode)
|
||||
} // end decode()
|
||||
} // namespace HsailISA
|
||||
''')
|
||||
|
||||
exec_code.dedent()
|
||||
exec_code('''
|
||||
} // namespace HsailISA
|
||||
''')
|
||||
|
||||
###############
|
||||
#
|
||||
# Output accumulated code to files
|
||||
#
|
||||
###############
|
||||
header_code.write(sys.argv[1])
|
||||
decoder_code.write(sys.argv[2])
|
||||
exec_code.write(sys.argv[3])
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_HSAIL_GPU_DECODER_HH__
|
||||
#define __ARCH_HSAIL_GPU_DECODER_HH__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "arch/hsail/gpu_types.hh"
|
||||
|
||||
class BrigObject;
|
||||
class GPUStaticInst;
|
||||
|
||||
namespace Brig
|
||||
{
|
||||
class BrigInstBase;
|
||||
}
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
class Decoder
|
||||
{
|
||||
public:
|
||||
GPUStaticInst* decode(MachInst machInst);
|
||||
|
||||
GPUStaticInst*
|
||||
decode(RawMachInst inst)
|
||||
{
|
||||
return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr;
|
||||
}
|
||||
|
||||
RawMachInst
|
||||
saveInst(GPUStaticInst *decodedInst)
|
||||
{
|
||||
decodedInsts.push_back(decodedInst);
|
||||
|
||||
return decodedInsts.size() - 1;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::vector<GPUStaticInst*> decodedInsts;
|
||||
};
|
||||
} // namespace HsailISA
|
||||
|
||||
#endif // __ARCH_HSAIL_GPU_DECODER_HH__
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_HSAIL_GPU_ISA_HH__
|
||||
#define __ARCH_HSAIL_GPU_ISA_HH__
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "arch/hsail/gpu_types.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "base/types.hh"
|
||||
#include "gpu-compute/misc.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
class GPUISA
|
||||
{
|
||||
public:
|
||||
GPUISA()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
writeMiscReg(int opIdx, RegVal operandVal)
|
||||
{
|
||||
fatal("HSAIL does not implement misc registers yet\n");
|
||||
}
|
||||
|
||||
RegVal
|
||||
readMiscReg(int opIdx) const
|
||||
{
|
||||
fatal("HSAIL does not implement misc registers yet\n");
|
||||
}
|
||||
|
||||
bool hasScalarUnit() const { return false; }
|
||||
|
||||
uint32_t
|
||||
advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
return old_pc + sizeof(RawMachInst);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // __ARCH_HSAIL_GPU_ISA_HH__
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_HSAIL_GPU_TYPES_HH__
|
||||
#define __ARCH_HSAIL_GPU_TYPES_HH__
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace Brig
|
||||
{
|
||||
class BrigInstBase;
|
||||
}
|
||||
|
||||
class BrigObject;
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
// A raw machine instruction represents the raw bits that
|
||||
// our model uses to represent an actual instruction. In
|
||||
// the case of HSAIL this is just an index into a list of
|
||||
// instruction objects.
|
||||
typedef uint32_t RawMachInst;
|
||||
|
||||
// The MachInst is a representation of an instruction
|
||||
// that has more information than just the machine code.
|
||||
// For HSAIL the actual machine code is a BrigInstBase
|
||||
// and the BrigObject contains more pertinent
|
||||
// information related to operaands, etc.
|
||||
|
||||
struct MachInst
|
||||
{
|
||||
const Brig::BrigInstBase *brigInstBase;
|
||||
const BrigObject *brigObj;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // __ARCH_HSAIL_GPU_TYPES_HH__
|
||||
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#include "arch/hsail/insts/branch.hh"
|
||||
|
||||
#include "gpu-compute/hsail_code.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
GPUStaticInst*
|
||||
decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
{
|
||||
// Detect direct vs indirect branch by seeing whether we have a
|
||||
// register operand.
|
||||
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
|
||||
const Brig::BrigOperand *reg = obj->getOperand(op_offs);
|
||||
|
||||
if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
|
||||
return new BrnIndirectInst(ib, obj);
|
||||
} else {
|
||||
return new BrnDirectInst(ib, obj);
|
||||
}
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
{
|
||||
// Detect direct vs indirect branch by seeing whether we have a
|
||||
// second register operand (after the condition).
|
||||
unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
|
||||
const Brig::BrigOperand *reg = obj->getOperand(op_offs);
|
||||
|
||||
if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
|
||||
return new CbrIndirectInst(ib, obj);
|
||||
} else {
|
||||
return new CbrDirectInst(ib, obj);
|
||||
}
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
{
|
||||
// Detect direct vs indirect branch by seeing whether we have a
|
||||
// second register operand (after the condition).
|
||||
unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
|
||||
const Brig::BrigOperand *reg = obj->getOperand(op_offs);
|
||||
|
||||
if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
|
||||
return new BrIndirectInst(ib, obj);
|
||||
} else {
|
||||
return new BrDirectInst(ib, obj);
|
||||
}
|
||||
}
|
||||
} // namespace HsailISA
|
||||
@@ -1,441 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
|
||||
#define __ARCH_HSAIL_INSTS_BRANCH_HH__
|
||||
|
||||
#include "arch/hsail/insts/gpu_static_inst.hh"
|
||||
#include "arch/hsail/operand.hh"
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
|
||||
// The main difference between a direct branch and an indirect branch
|
||||
// is whether the target is a register or a label, so we can share a
|
||||
// lot of code if we template the base implementation on that type.
|
||||
template<typename TargetType>
|
||||
class BrnInstBase : public HsailGPUStaticInst
|
||||
{
|
||||
public:
|
||||
void generateDisassembly() override;
|
||||
|
||||
Brig::BrigWidth8_t width;
|
||||
TargetType target;
|
||||
|
||||
BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: HsailGPUStaticInst(obj, "brn")
|
||||
{
|
||||
setFlag(Branch);
|
||||
setFlag(UnconditionalJump);
|
||||
width = ((Brig::BrigInstBr*)ib)->width;
|
||||
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
|
||||
target.init(op_offs, obj);
|
||||
}
|
||||
|
||||
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
|
||||
|
||||
bool isVectorRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.isVectorRegister();
|
||||
}
|
||||
bool isCondRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.isCondRegister();
|
||||
}
|
||||
bool isScalarRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.isScalarRegister();
|
||||
}
|
||||
|
||||
bool isSrcOperand(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isDstOperand(int operandIndex) override {
|
||||
return false;
|
||||
}
|
||||
|
||||
int getOperandSize(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.opSize();
|
||||
}
|
||||
|
||||
int
|
||||
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
|
||||
{
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.regIndex();
|
||||
}
|
||||
|
||||
int getNumOperands() override {
|
||||
return 1;
|
||||
}
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst) override;
|
||||
};
|
||||
|
||||
template<typename TargetType>
|
||||
void
|
||||
BrnInstBase<TargetType>::generateDisassembly()
|
||||
{
|
||||
std::string widthClause;
|
||||
|
||||
if (width != 1) {
|
||||
widthClause = csprintf("_width(%d)", width);
|
||||
}
|
||||
|
||||
disassembly = csprintf("%s%s %s", opcode, widthClause,
|
||||
target.disassemble());
|
||||
}
|
||||
|
||||
template<typename TargetType>
|
||||
void
|
||||
BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
if (getTargetPc() == w->rpc()) {
|
||||
w->popFromReconvergenceStack();
|
||||
} else {
|
||||
// Rpc and execution mask remain the same
|
||||
w->pc(getTargetPc());
|
||||
}
|
||||
}
|
||||
|
||||
class BrnDirectInst : public BrnInstBase<LabelOperand>
|
||||
{
|
||||
public:
|
||||
BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: BrnInstBase<LabelOperand>(ib, obj)
|
||||
{
|
||||
}
|
||||
int numSrcRegOperands() { return 0; }
|
||||
int numDstRegOperands() { return 0; }
|
||||
};
|
||||
|
||||
class BrnIndirectInst : public BrnInstBase<SRegOperand>
|
||||
{
|
||||
public:
|
||||
BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: BrnInstBase<SRegOperand>(ib, obj)
|
||||
{
|
||||
}
|
||||
int numSrcRegOperands() { return target.isVectorRegister(); }
|
||||
int numDstRegOperands() { return 0; }
|
||||
};
|
||||
|
||||
GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
|
||||
const BrigObject *obj);
|
||||
|
||||
template<typename TargetType>
|
||||
class CbrInstBase : public HsailGPUStaticInst
|
||||
{
|
||||
public:
|
||||
void generateDisassembly() override;
|
||||
|
||||
Brig::BrigWidth8_t width;
|
||||
CRegOperand cond;
|
||||
TargetType target;
|
||||
|
||||
CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: HsailGPUStaticInst(obj, "cbr")
|
||||
{
|
||||
setFlag(Branch);
|
||||
width = ((Brig::BrigInstBr *)ib)->width;
|
||||
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
|
||||
cond.init(op_offs, obj);
|
||||
op_offs = obj->getOperandPtr(ib->operands, 1);
|
||||
target.init(op_offs, obj);
|
||||
}
|
||||
|
||||
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst) override;
|
||||
// Assumption: Target is operand 0, Condition Register is operand 1
|
||||
bool isVectorRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
if (!operandIndex)
|
||||
return target.isVectorRegister();
|
||||
else
|
||||
return false;
|
||||
}
|
||||
bool isCondRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
if (!operandIndex)
|
||||
return target.isCondRegister();
|
||||
else
|
||||
return true;
|
||||
}
|
||||
bool isScalarRegister(int operandIndex) override {
|
||||
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
|
||||
if (!operandIndex)
|
||||
return target.isScalarRegister();
|
||||
else
|
||||
return false;
|
||||
}
|
||||
bool isSrcOperand(int operandIndex) override {
|
||||
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
|
||||
if (operandIndex == 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
// both Condition Register and Target are source operands
|
||||
bool isDstOperand(int operandIndex) override {
|
||||
return false;
|
||||
}
|
||||
int getOperandSize(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
if (!operandIndex)
|
||||
return target.opSize();
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
int
|
||||
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
|
||||
{
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
if (!operandIndex)
|
||||
return target.regIndex();
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Operands = Target, Condition Register
|
||||
int getNumOperands() override {
|
||||
return 2;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename TargetType>
|
||||
void
|
||||
CbrInstBase<TargetType>::generateDisassembly()
|
||||
{
|
||||
std::string widthClause;
|
||||
|
||||
if (width != 1) {
|
||||
widthClause = csprintf("_width(%d)", width);
|
||||
}
|
||||
|
||||
disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
|
||||
cond.disassemble(), target.disassemble());
|
||||
}
|
||||
|
||||
template<typename TargetType>
|
||||
void
|
||||
CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const uint32_t curr_pc M5_VAR_USED = w->pc();
|
||||
const uint32_t curr_rpc = w->rpc();
|
||||
const VectorMask curr_mask = w->execMask();
|
||||
|
||||
/**
|
||||
* TODO: can we move this pop outside the instruction, and
|
||||
* into the wavefront?
|
||||
*/
|
||||
w->popFromReconvergenceStack();
|
||||
|
||||
// immediate post-dominator instruction
|
||||
const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
|
||||
if (curr_rpc != rpc) {
|
||||
w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
|
||||
}
|
||||
|
||||
// taken branch
|
||||
const uint32_t true_pc = getTargetPc();
|
||||
VectorMask true_mask;
|
||||
for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
|
||||
}
|
||||
|
||||
// not taken branch
|
||||
const uint32_t false_pc = nextInstAddr();
|
||||
assert(true_pc != false_pc);
|
||||
if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
|
||||
VectorMask false_mask = curr_mask & ~true_mask;
|
||||
w->pushToReconvergenceStack(false_pc, rpc, false_mask);
|
||||
}
|
||||
|
||||
if (true_pc != rpc && true_mask.count()) {
|
||||
w->pushToReconvergenceStack(true_pc, rpc, true_mask);
|
||||
}
|
||||
assert(w->pc() != curr_pc);
|
||||
}
|
||||
|
||||
|
||||
class CbrDirectInst : public CbrInstBase<LabelOperand>
|
||||
{
|
||||
public:
|
||||
CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: CbrInstBase<LabelOperand>(ib, obj)
|
||||
{
|
||||
}
|
||||
// the source operand of a conditional branch is a Condition
|
||||
// Register which is not stored in the VRF
|
||||
// so we do not count it as a source-register operand
|
||||
// even though, formally, it is one.
|
||||
int numSrcRegOperands() { return 0; }
|
||||
int numDstRegOperands() { return 0; }
|
||||
};
|
||||
|
||||
class CbrIndirectInst : public CbrInstBase<SRegOperand>
|
||||
{
|
||||
public:
|
||||
CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: CbrInstBase<SRegOperand>(ib, obj)
|
||||
{
|
||||
}
|
||||
// one source operand of the conditional indirect branch is a Condition
|
||||
// register which is not stored in the VRF so we do not count it
|
||||
// as a source-register operand even though, formally, it is one.
|
||||
int numSrcRegOperands() { return target.isVectorRegister(); }
|
||||
int numDstRegOperands() { return 0; }
|
||||
};
|
||||
|
||||
GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
|
||||
const BrigObject *obj);
|
||||
|
||||
template<typename TargetType>
|
||||
class BrInstBase : public HsailGPUStaticInst
|
||||
{
|
||||
public:
|
||||
void generateDisassembly() override;
|
||||
|
||||
ImmOperand<uint32_t> width;
|
||||
TargetType target;
|
||||
|
||||
BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: HsailGPUStaticInst(obj, "br")
|
||||
{
|
||||
setFlag(Branch);
|
||||
setFlag(UnconditionalJump);
|
||||
width.init(((Brig::BrigInstBr *)ib)->width, obj);
|
||||
unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
|
||||
target.init(op_offs, obj);
|
||||
}
|
||||
|
||||
uint32_t getTargetPc() override { return target.getTarget(0, 0); }
|
||||
|
||||
void execute(GPUDynInstPtr gpuDynInst) override;
|
||||
bool isVectorRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.isVectorRegister();
|
||||
}
|
||||
bool isCondRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.isCondRegister();
|
||||
}
|
||||
bool isScalarRegister(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.isScalarRegister();
|
||||
}
|
||||
bool isSrcOperand(int operandIndex) override {
|
||||
assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
|
||||
return true;
|
||||
}
|
||||
bool isDstOperand(int operandIndex) override { return false; }
|
||||
int getOperandSize(int operandIndex) override {
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.opSize();
|
||||
}
|
||||
int
|
||||
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
|
||||
{
|
||||
assert(operandIndex >= 0 && operandIndex < getNumOperands());
|
||||
return target.regIndex();
|
||||
}
|
||||
int getNumOperands() override { return 1; }
|
||||
};
|
||||
|
||||
template<typename TargetType>
|
||||
void
|
||||
BrInstBase<TargetType>::generateDisassembly()
|
||||
{
|
||||
std::string widthClause;
|
||||
|
||||
if (width.bits != 1) {
|
||||
widthClause = csprintf("_width(%d)", width.bits);
|
||||
}
|
||||
|
||||
disassembly = csprintf("%s%s %s", opcode, widthClause,
|
||||
target.disassemble());
|
||||
}
|
||||
|
||||
template<typename TargetType>
|
||||
void
|
||||
BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
if (getTargetPc() == w->rpc()) {
|
||||
w->popFromReconvergenceStack();
|
||||
} else {
|
||||
// Rpc and execution mask remain the same
|
||||
w->pc(getTargetPc());
|
||||
}
|
||||
}
|
||||
|
||||
class BrDirectInst : public BrInstBase<LabelOperand>
|
||||
{
|
||||
public:
|
||||
BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: BrInstBase<LabelOperand>(ib, obj)
|
||||
{
|
||||
}
|
||||
|
||||
int numSrcRegOperands() { return 0; }
|
||||
int numDstRegOperands() { return 0; }
|
||||
};
|
||||
|
||||
class BrIndirectInst : public BrInstBase<SRegOperand>
|
||||
{
|
||||
public:
|
||||
BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
|
||||
: BrInstBase<SRegOperand>(ib, obj)
|
||||
{
|
||||
}
|
||||
int numSrcRegOperands() { return target.isVectorRegister(); }
|
||||
int numDstRegOperands() { return 0; }
|
||||
};
|
||||
|
||||
GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
|
||||
const BrigObject *obj);
|
||||
} // namespace HsailISA
|
||||
|
||||
#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#include "arch/hsail/insts/gpu_static_inst.hh"
|
||||
|
||||
#include "gpu-compute/brig_object.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj,
|
||||
const std::string &opcode)
|
||||
: GPUStaticInst(opcode), hsailCode(obj->currentCode)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
HsailGPUStaticInst::generateDisassembly()
|
||||
{
|
||||
disassembly = opcode;
|
||||
}
|
||||
} // namespace HsailISA
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
|
||||
#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
|
||||
|
||||
/*
|
||||
* @file gpu_static_inst.hh
|
||||
*
|
||||
* Defines the base class representing HSAIL GPU static instructions.
|
||||
*/
|
||||
|
||||
#include "arch/hsail/gpu_types.hh"
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
|
||||
class BrigObject;
|
||||
class HsailCode;
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
class HsailGPUStaticInst : public GPUStaticInst
|
||||
{
|
||||
public:
|
||||
HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
|
||||
void generateDisassembly() override;
|
||||
int instSize() const override { return sizeof(RawMachInst); }
|
||||
bool isValid() const override { return true; }
|
||||
|
||||
protected:
|
||||
HsailCode *hsailCode;
|
||||
};
|
||||
} // namespace HsailISA
|
||||
|
||||
#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
|
||||
@@ -1,209 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#include "arch/hsail/insts/decl.hh"
|
||||
#include "debug/GPUExec.hh"
|
||||
#include "gpu-compute/dispatcher.hh"
|
||||
#include "gpu-compute/simple_pool_manager.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
template<> const char *B1::label = "b1";
|
||||
template<> const char *B8::label = "b8";
|
||||
template<> const char *B16::label = "b16";
|
||||
template<> const char *B32::label = "b32";
|
||||
template<> const char *B64::label = "b64";
|
||||
|
||||
template<> const char *S8::label = "s8";
|
||||
template<> const char *S16::label = "s16";
|
||||
template<> const char *S32::label = "s32";
|
||||
template<> const char *S64::label = "s64";
|
||||
|
||||
template<> const char *U8::label = "u8";
|
||||
template<> const char *U16::label = "u16";
|
||||
template<> const char *U32::label = "u32";
|
||||
template<> const char *U64::label = "u64";
|
||||
|
||||
template<> const char *F32::label = "f32";
|
||||
template<> const char *F64::label = "f64";
|
||||
|
||||
const char*
|
||||
cmpOpToString(Brig::BrigCompareOperation cmpOp)
|
||||
{
|
||||
using namespace Brig;
|
||||
|
||||
switch (cmpOp) {
|
||||
case BRIG_COMPARE_EQ:
|
||||
return "eq";
|
||||
case BRIG_COMPARE_NE:
|
||||
return "ne";
|
||||
case BRIG_COMPARE_LT:
|
||||
return "lt";
|
||||
case BRIG_COMPARE_LE:
|
||||
return "le";
|
||||
case BRIG_COMPARE_GT:
|
||||
return "gt";
|
||||
case BRIG_COMPARE_GE:
|
||||
return "ge";
|
||||
case BRIG_COMPARE_EQU:
|
||||
return "equ";
|
||||
case BRIG_COMPARE_NEU:
|
||||
return "neu";
|
||||
case BRIG_COMPARE_LTU:
|
||||
return "ltu";
|
||||
case BRIG_COMPARE_LEU:
|
||||
return "leu";
|
||||
case BRIG_COMPARE_GTU:
|
||||
return "gtu";
|
||||
case BRIG_COMPARE_GEU:
|
||||
return "geu";
|
||||
case BRIG_COMPARE_NUM:
|
||||
return "num";
|
||||
case BRIG_COMPARE_NAN:
|
||||
return "nan";
|
||||
case BRIG_COMPARE_SEQ:
|
||||
return "seq";
|
||||
case BRIG_COMPARE_SNE:
|
||||
return "sne";
|
||||
case BRIG_COMPARE_SLT:
|
||||
return "slt";
|
||||
case BRIG_COMPARE_SLE:
|
||||
return "sle";
|
||||
case BRIG_COMPARE_SGT:
|
||||
return "sgt";
|
||||
case BRIG_COMPARE_SGE:
|
||||
return "sge";
|
||||
case BRIG_COMPARE_SGEU:
|
||||
return "sgeu";
|
||||
case BRIG_COMPARE_SEQU:
|
||||
return "sequ";
|
||||
case BRIG_COMPARE_SNEU:
|
||||
return "sneu";
|
||||
case BRIG_COMPARE_SLTU:
|
||||
return "sltu";
|
||||
case BRIG_COMPARE_SLEU:
|
||||
return "sleu";
|
||||
case BRIG_COMPARE_SNUM:
|
||||
return "snum";
|
||||
case BRIG_COMPARE_SNAN:
|
||||
return "snan";
|
||||
case BRIG_COMPARE_SGTU:
|
||||
return "sgtu";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Ret::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
// mask off completed work-items
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
w->initMask[lane] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// delete extra instructions fetched for completed work-items
|
||||
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
|
||||
w->instructionBuffer.end());
|
||||
if (w->pendingFetch) {
|
||||
w->dropFetch = true;
|
||||
}
|
||||
|
||||
// if all work-items have completed, then wave-front is done
|
||||
if (w->initMask.none()) {
|
||||
w->status = Wavefront::S_STOPPED;
|
||||
|
||||
int32_t refCount = w->computeUnit->getLds().
|
||||
decreaseRefCounter(w->dispatchId, w->wgId);
|
||||
|
||||
DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
|
||||
w->computeUnit->cu_id, w->wgId, refCount);
|
||||
|
||||
// free the vector registers of the completed wavefront
|
||||
w->computeUnit->vectorRegsReserved[w->simdId] -=
|
||||
w->reservedVectorRegs;
|
||||
|
||||
assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
|
||||
|
||||
uint32_t endIndex = (w->startVgprIndex +
|
||||
w->reservedVectorRegs - 1) %
|
||||
w->computeUnit->vrf[w->simdId]->numRegs();
|
||||
|
||||
w->computeUnit->vrf[w->simdId]->manager->
|
||||
freeRegion(w->startVgprIndex, endIndex);
|
||||
|
||||
w->reservedVectorRegs = 0;
|
||||
w->startVgprIndex = 0;
|
||||
w->computeUnit->completedWfs++;
|
||||
|
||||
DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
|
||||
w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
|
||||
|
||||
if (!refCount) {
|
||||
setFlag(SystemScope);
|
||||
setFlag(Release);
|
||||
setFlag(GlobalSegment);
|
||||
// Notify Memory System of Kernel Completion
|
||||
// Kernel End = isKernel + isRelease
|
||||
w->status = Wavefront::S_RETURNING;
|
||||
GPUDynInstPtr local_mempacket = gpuDynInst;
|
||||
local_mempacket->useContinuation = false;
|
||||
local_mempacket->simdId = w->simdId;
|
||||
local_mempacket->wfSlotId = w->wfSlotId;
|
||||
local_mempacket->wfDynId = w->wfDynId;
|
||||
w->computeUnit->injectGlobalMemFence(local_mempacket, true);
|
||||
} else {
|
||||
w->computeUnit->shader->dispatcher->scheduleDispatch();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Barrier::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
assert(w->barrierCnt == w->oldBarrierCnt);
|
||||
w->barrierCnt = w->oldBarrierCnt + 1;
|
||||
w->stalledAtBarrier = true;
|
||||
}
|
||||
} // namespace HsailISA
|
||||
@@ -1,76 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#include "arch/hsail/insts/mem.hh"
|
||||
|
||||
#include "arch/hsail/Brig.h"
|
||||
|
||||
using namespace Brig;
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
const char* atomicOpToString(BrigAtomicOperation brigOp);
|
||||
|
||||
const char*
|
||||
atomicOpToString(BrigAtomicOperation brigOp)
|
||||
{
|
||||
switch (brigOp) {
|
||||
case BRIG_ATOMIC_AND:
|
||||
return "and";
|
||||
case BRIG_ATOMIC_OR:
|
||||
return "or";
|
||||
case BRIG_ATOMIC_XOR:
|
||||
return "xor";
|
||||
case BRIG_ATOMIC_CAS:
|
||||
return "cas";
|
||||
case BRIG_ATOMIC_EXCH:
|
||||
return "exch";
|
||||
case BRIG_ATOMIC_ADD:
|
||||
return "add";
|
||||
case BRIG_ATOMIC_WRAPINC:
|
||||
return "inc";
|
||||
case BRIG_ATOMIC_WRAPDEC:
|
||||
return "dec";
|
||||
case BRIG_ATOMIC_MIN:
|
||||
return "min";
|
||||
case BRIG_ATOMIC_MAX:
|
||||
return "max";
|
||||
case BRIG_ATOMIC_SUB:
|
||||
return "sub";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
} // namespace HsailISA
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,648 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#include "gpu-compute/hsail_code.hh"
|
||||
|
||||
// defined in code.cc, but not worth sucking in all of code.h for this
|
||||
// at this point
|
||||
extern const char *segmentNames[];
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
template<typename DestDataType, typename AddrRegOperandType>
|
||||
void
|
||||
LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
|
||||
{
|
||||
this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
|
||||
DestDataType::label,
|
||||
this->dest.disassemble(),
|
||||
this->addr.disassemble());
|
||||
}
|
||||
|
||||
template<typename DestDataType, typename AddrRegOperandType>
|
||||
void
|
||||
LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef typename DestDataType::CType CType M5_VAR_USED;
|
||||
const VectorMask &mask = w->getPred();
|
||||
std::vector<Addr> addr_vec;
|
||||
addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
|
||||
this->addr.calcVector(w, addr_vec);
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
this->dest.set(w, lane, addr_vec[lane]);
|
||||
}
|
||||
}
|
||||
addr_vec.clear();
|
||||
}
|
||||
|
||||
template<typename MemDataType, typename DestDataType,
|
||||
typename AddrRegOperandType>
|
||||
void
|
||||
LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
|
||||
{
|
||||
switch (num_dest_operands) {
|
||||
case 1:
|
||||
this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
|
||||
segmentNames[this->segment],
|
||||
MemDataType::label,
|
||||
this->dest.disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
case 2:
|
||||
this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
|
||||
segmentNames[this->segment],
|
||||
MemDataType::label,
|
||||
this->dest_vect[0].disassemble(),
|
||||
this->dest_vect[1].disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
case 3:
|
||||
this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
|
||||
segmentNames[this->segment],
|
||||
MemDataType::label,
|
||||
this->dest_vect[0].disassemble(),
|
||||
this->dest_vect[1].disassemble(),
|
||||
this->dest_vect[2].disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
case 4:
|
||||
this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
|
||||
this->opcode,
|
||||
segmentNames[this->segment],
|
||||
MemDataType::label,
|
||||
this->dest_vect[0].disassemble(),
|
||||
this->dest_vect[1].disassemble(),
|
||||
this->dest_vect[2].disassemble(),
|
||||
this->dest_vect[3].disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
default:
|
||||
fatal("Bad ld register dest operand, num vector operands: %d \n",
|
||||
num_dest_operands);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static Addr
|
||||
calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
|
||||
{
|
||||
// what is the size of the object we are accessing??
|
||||
// NOTE: the compiler doesn't generate enough information
|
||||
// to do this yet..have to just line up all the private
|
||||
// work-item spaces back to back for now
|
||||
/*
|
||||
StorageElement* se =
|
||||
i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
|
||||
assert(se);
|
||||
|
||||
return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
|
||||
se->offset * w->computeUnit->wfSize() +
|
||||
lane * se->size;
|
||||
*/
|
||||
|
||||
// addressing strategy: interleave the private spaces of
|
||||
// work-items in a wave-front on 8 byte granularity.
|
||||
// this won't be perfect coalescing like the spill space
|
||||
// strategy, but it's better than nothing. The spill space
|
||||
// strategy won't work with private because the same address
|
||||
// may be accessed by different sized loads/stores.
|
||||
|
||||
// Note: I'm assuming that the largest load/store to private
|
||||
// is 8 bytes. If it is larger, the stride will have to increase
|
||||
|
||||
Addr addr_div8 = addr / 8;
|
||||
Addr addr_mod8 = addr % 8;
|
||||
|
||||
Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
|
||||
addr_mod8 + w->privBase;
|
||||
|
||||
assert(ret < w->privBase +
|
||||
(w->privSizePerItem * w->computeUnit->wfSize()));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename MemDataType, typename DestDataType,
|
||||
typename AddrRegOperandType>
|
||||
void
|
||||
LdInst<MemDataType, DestDataType,
|
||||
AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef typename MemDataType::CType MemCType;
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
// Kernarg references are handled uniquely for now (no Memory Request
|
||||
// is used), so special-case them up front. Someday we should
|
||||
// make this more realistic, at which we should get rid of this
|
||||
// block and fold this case into the switch below.
|
||||
if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
|
||||
MemCType val;
|
||||
|
||||
// I assume no vector ld for kernargs
|
||||
assert(num_dest_operands == 1);
|
||||
|
||||
// assuming for the moment that we'll never do register
|
||||
// offsets into kernarg space... just to make life simpler
|
||||
uint64_t address = this->addr.calcUniform();
|
||||
|
||||
val = *(MemCType*)&w->kernelArgs[address];
|
||||
|
||||
DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
this->dest.set(w, lane, val);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
} else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
|
||||
uint64_t address = this->addr.calcUniform();
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
MemCType val = w->readCallArgMem<MemCType>(lane, address);
|
||||
|
||||
DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
|
||||
(unsigned long long)val);
|
||||
|
||||
this->dest.set(w, lane, val);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
GPUDynInstPtr m = gpuDynInst;
|
||||
|
||||
this->addr.calcVector(w, m->addr);
|
||||
|
||||
m->m_type = MemDataType::memType;
|
||||
m->v_type = DestDataType::vgprType;
|
||||
|
||||
m->exec_mask = w->execMask();
|
||||
m->statusBitVector = 0;
|
||||
m->equiv = this->equivClass;
|
||||
|
||||
if (num_dest_operands == 1) {
|
||||
m->dst_reg = this->dest.regIndex();
|
||||
m->n_reg = 1;
|
||||
} else {
|
||||
m->n_reg = num_dest_operands;
|
||||
for (int i = 0; i < num_dest_operands; ++i) {
|
||||
m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
|
||||
}
|
||||
}
|
||||
|
||||
m->simdId = w->simdId;
|
||||
m->wfSlotId = w->wfSlotId;
|
||||
m->wfDynId = w->wfDynId;
|
||||
m->kern_id = w->kernId;
|
||||
m->cu_id = w->computeUnit->cu_id;
|
||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||
|
||||
switch (this->segment) {
|
||||
case Brig::BRIG_SEGMENT_GLOBAL:
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
|
||||
// this is a complete hack to get around a compiler bug
|
||||
// (the compiler currently generates global access for private
|
||||
// addresses (starting from 0). We need to add the private offset)
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (m->addr[lane] < w->privSizePerItem) {
|
||||
if (mask[lane]) {
|
||||
// what is the size of the object we are accessing?
|
||||
// find base for for this wavefront
|
||||
|
||||
// calcPrivAddr will fail if accesses are unaligned
|
||||
assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
|
||||
|
||||
Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
|
||||
this);
|
||||
|
||||
m->addr[lane] = privAddr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_SPILL:
|
||||
assert(num_dest_operands == 1);
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
{
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
// note: this calculation will NOT WORK if the compiler
|
||||
// ever generates loads/stores to the same address with
|
||||
// different widths (e.g., a ld_u32 addr and a ld_u16 addr)
|
||||
if (mask[lane]) {
|
||||
assert(m->addr[lane] < w->spillSizePerItem);
|
||||
|
||||
m->addr[lane] = m->addr[lane] * w->spillWidth +
|
||||
lane * sizeof(MemCType) + w->spillBase;
|
||||
|
||||
w->lastAddr[lane] = m->addr[lane];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_GROUP:
|
||||
m->pipeId = LDSMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(24));
|
||||
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
||||
w->outstandingReqsRdLm++;
|
||||
w->rdLmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_READONLY:
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
|
||||
m->addr[lane] += w->roBase;
|
||||
}
|
||||
}
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_PRIVATE:
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
{
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
assert(m->addr[lane] < w->privSizePerItem);
|
||||
|
||||
m->addr[lane] = m->addr[lane] +
|
||||
lane * sizeof(MemCType) + w->privBase;
|
||||
}
|
||||
}
|
||||
}
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("Load to unsupported segment %d %llxe\n", this->segment,
|
||||
m->addr[0]);
|
||||
}
|
||||
|
||||
w->outstandingReqs++;
|
||||
w->memReqsInPipe--;
|
||||
}
|
||||
|
||||
template<typename OperationType, typename SrcDataType,
|
||||
typename AddrRegOperandType>
|
||||
void
|
||||
StInst<OperationType, SrcDataType,
|
||||
AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
typedef typename OperationType::CType CType;
|
||||
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
// arg references are handled uniquely for now (no Memory Request
|
||||
// is used), so special-case them up front. Someday we should
|
||||
// make this more realistic, at which we should get rid of this
|
||||
// block and fold this case into the switch below.
|
||||
if (this->segment == Brig::BRIG_SEGMENT_ARG) {
|
||||
uint64_t address = this->addr.calcUniform();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
CType data = this->src.template get<CType>(w, lane);
|
||||
DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
|
||||
w->writeCallArgMem<CType>(lane, address, data);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
GPUDynInstPtr m = gpuDynInst;
|
||||
|
||||
m->exec_mask = w->execMask();
|
||||
|
||||
this->addr.calcVector(w, m->addr);
|
||||
|
||||
if (num_src_operands == 1) {
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
((CType*)m->d_data)[lane] =
|
||||
this->src.template get<CType>(w, lane);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k= 0; k < num_src_operands; ++k) {
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
|
||||
this->src_vect[k].template get<CType>(w, lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m->m_type = OperationType::memType;
|
||||
m->v_type = OperationType::vgprType;
|
||||
|
||||
m->statusBitVector = 0;
|
||||
m->equiv = this->equivClass;
|
||||
|
||||
if (num_src_operands == 1) {
|
||||
m->n_reg = 1;
|
||||
} else {
|
||||
m->n_reg = num_src_operands;
|
||||
}
|
||||
|
||||
m->simdId = w->simdId;
|
||||
m->wfSlotId = w->wfSlotId;
|
||||
m->wfDynId = w->wfDynId;
|
||||
m->kern_id = w->kernId;
|
||||
m->cu_id = w->computeUnit->cu_id;
|
||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||
|
||||
switch (this->segment) {
|
||||
case Brig::BRIG_SEGMENT_GLOBAL:
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
|
||||
// this is a complete hack to get around a compiler bug
|
||||
// (the compiler currently generates global access for private
|
||||
// addresses (starting from 0). We need to add the private offset)
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
if (m->addr[lane] < w->privSizePerItem) {
|
||||
|
||||
// calcPrivAddr will fail if accesses are unaligned
|
||||
assert(!((sizeof(CType)-1) & m->addr[lane]));
|
||||
|
||||
Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
|
||||
this);
|
||||
|
||||
m->addr[lane] = privAddr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsWrGm++;
|
||||
w->wrGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_SPILL:
|
||||
assert(num_src_operands == 1);
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
{
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
assert(m->addr[lane] < w->spillSizePerItem);
|
||||
|
||||
m->addr[lane] = m->addr[lane] * w->spillWidth +
|
||||
lane * sizeof(CType) + w->spillBase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsWrGm++;
|
||||
w->wrGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_GROUP:
|
||||
m->pipeId = LDSMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(24));
|
||||
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
||||
w->outstandingReqsWrLm++;
|
||||
w->wrLmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_PRIVATE:
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
{
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
assert(m->addr[lane] < w->privSizePerItem);
|
||||
m->addr[lane] = m->addr[lane] + lane *
|
||||
sizeof(CType)+w->privBase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsWrGm++;
|
||||
w->wrGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("Store to unsupported segment %d\n", this->segment);
|
||||
}
|
||||
|
||||
w->outstandingReqs++;
|
||||
w->memReqsInPipe--;
|
||||
}
|
||||
|
||||
template<typename OperationType, typename SrcDataType,
|
||||
typename AddrRegOperandType>
|
||||
void
|
||||
StInst<OperationType, SrcDataType,
|
||||
AddrRegOperandType>::generateDisassembly()
|
||||
{
|
||||
switch (num_src_operands) {
|
||||
case 1:
|
||||
this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
|
||||
segmentNames[this->segment],
|
||||
OperationType::label,
|
||||
this->src.disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
case 2:
|
||||
this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
|
||||
segmentNames[this->segment],
|
||||
OperationType::label,
|
||||
this->src_vect[0].disassemble(),
|
||||
this->src_vect[1].disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
case 4:
|
||||
this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
|
||||
this->opcode,
|
||||
segmentNames[this->segment],
|
||||
OperationType::label,
|
||||
this->src_vect[0].disassemble(),
|
||||
this->src_vect[1].disassemble(),
|
||||
this->src_vect[2].disassemble(),
|
||||
this->src_vect[3].disassemble(),
|
||||
this->addr.disassemble());
|
||||
break;
|
||||
default: fatal("Bad ld register src operand, num vector operands: "
|
||||
"%d \n", num_src_operands);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
|
||||
bool HasDst>
|
||||
void
|
||||
AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
|
||||
HasDst>::execute(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
typedef typename DataType::CType CType;
|
||||
|
||||
Wavefront *w = gpuDynInst->wavefront();
|
||||
|
||||
GPUDynInstPtr m = gpuDynInst;
|
||||
|
||||
this->addr.calcVector(w, m->addr);
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
((CType *)m->a_data)[lane] =
|
||||
this->src[0].template get<CType>(w, lane);
|
||||
}
|
||||
|
||||
// load second source operand for CAS
|
||||
if (NumSrcOperands > 1) {
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
((CType*)m->x_data)[lane] =
|
||||
this->src[1].template get<CType>(w, lane);
|
||||
}
|
||||
}
|
||||
|
||||
assert(NumSrcOperands <= 2);
|
||||
|
||||
m->m_type = DataType::memType;
|
||||
m->v_type = DataType::vgprType;
|
||||
|
||||
m->exec_mask = w->execMask();
|
||||
m->statusBitVector = 0;
|
||||
m->equiv = 0; // atomics don't have an equivalence class operand
|
||||
m->n_reg = 1;
|
||||
|
||||
if (HasDst) {
|
||||
m->dst_reg = this->dest.regIndex();
|
||||
}
|
||||
|
||||
m->simdId = w->simdId;
|
||||
m->wfSlotId = w->wfSlotId;
|
||||
m->wfDynId = w->wfDynId;
|
||||
m->kern_id = w->kernId;
|
||||
m->cu_id = w->computeUnit->cu_id;
|
||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||
|
||||
switch (this->segment) {
|
||||
case Brig::BRIG_SEGMENT_GLOBAL:
|
||||
m->latency.set(w->computeUnit->shader->ticks(64));
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsWrGm++;
|
||||
w->wrGmReqsInPipe--;
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
break;
|
||||
|
||||
case Brig::BRIG_SEGMENT_GROUP:
|
||||
m->pipeId = LDSMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(24));
|
||||
w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
|
||||
w->outstandingReqsWrLm++;
|
||||
w->wrLmReqsInPipe--;
|
||||
w->outstandingReqsRdLm++;
|
||||
w->rdLmReqsInPipe--;
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("Atomic op to unsupported segment %d\n",
|
||||
this->segment);
|
||||
}
|
||||
|
||||
w->outstandingReqs++;
|
||||
w->memReqsInPipe--;
|
||||
}
|
||||
|
||||
const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
|
||||
|
||||
template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
|
||||
bool HasDst>
|
||||
void
|
||||
AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
|
||||
HasDst>::generateDisassembly()
|
||||
{
|
||||
if (HasDst) {
|
||||
this->disassembly =
|
||||
csprintf("%s_%s_%s_%s %s,%s", this->opcode,
|
||||
atomicOpToString(this->atomicOperation),
|
||||
segmentNames[this->segment],
|
||||
DataType::label, this->dest.disassemble(),
|
||||
this->addr.disassemble());
|
||||
} else {
|
||||
this->disassembly =
|
||||
csprintf("%s_%s_%s_%s %s", this->opcode,
|
||||
atomicOpToString(this->atomicOperation),
|
||||
segmentNames[this->segment],
|
||||
DataType::label, this->addr.disassemble());
|
||||
}
|
||||
|
||||
for (int i = 0; i < NumSrcOperands; ++i) {
|
||||
this->disassembly += ",";
|
||||
this->disassembly += this->src[i].disassemble();
|
||||
}
|
||||
}
|
||||
} // namespace HsailISA
|
||||
@@ -1,791 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Marc Orr
|
||||
*/
|
||||
|
||||
#include <csignal>
|
||||
|
||||
#include "arch/hsail/insts/decl.hh"
|
||||
#include "arch/hsail/insts/mem.hh"
|
||||
|
||||
namespace HsailISA
|
||||
{
|
||||
// Pseudo (or magic) instructions are overloaded on the hsail call
|
||||
// instruction, because of its flexible parameter signature.
|
||||
|
||||
// To add a new magic instruction:
|
||||
// 1. Add an entry to the enum.
|
||||
// 2. Implement it in the switch statement below (Call::exec).
|
||||
// 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
|
||||
// so its easy to call from an OpenCL kernel.
|
||||
|
||||
// This enum should be identical to the enum in
|
||||
// hsa/hsail-gpu-compute/util/magicinst.h
|
||||
enum
|
||||
{
|
||||
MAGIC_PRINT_WF_32 = 0,
|
||||
MAGIC_PRINT_WF_64,
|
||||
MAGIC_PRINT_LANE,
|
||||
MAGIC_PRINT_LANE_64,
|
||||
MAGIC_PRINT_WF_FLOAT,
|
||||
MAGIC_SIM_BREAK,
|
||||
MAGIC_PREF_SUM,
|
||||
MAGIC_REDUCTION,
|
||||
MAGIC_MASKLANE_LOWER,
|
||||
MAGIC_MASKLANE_UPPER,
|
||||
MAGIC_JOIN_WF_BAR,
|
||||
MAGIC_WAIT_WF_BAR,
|
||||
MAGIC_PANIC,
|
||||
MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG,
|
||||
MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG,
|
||||
MAGIC_LOAD_GLOBAL_U32_REG,
|
||||
MAGIC_XACT_CAS_LD,
|
||||
MAGIC_MOST_SIG_THD,
|
||||
MAGIC_MOST_SIG_BROADCAST,
|
||||
MAGIC_PRINT_WFID_32,
|
||||
MAGIC_PRINT_WFID_64
|
||||
};
|
||||
|
||||
void
|
||||
Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
int op = 0;
|
||||
bool got_op = false;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val0 = src1.get<int>(w, lane, 0);
|
||||
if (got_op) {
|
||||
if (src_val0 != op) {
|
||||
fatal("Multiple magic instructions per PC not "
|
||||
"supported\n");
|
||||
}
|
||||
} else {
|
||||
op = src_val0;
|
||||
got_op = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch(op) {
|
||||
case MAGIC_PRINT_WF_32:
|
||||
MagicPrintWF32(w);
|
||||
break;
|
||||
case MAGIC_PRINT_WF_64:
|
||||
MagicPrintWF64(w);
|
||||
break;
|
||||
case MAGIC_PRINT_LANE:
|
||||
MagicPrintLane(w);
|
||||
break;
|
||||
case MAGIC_PRINT_LANE_64:
|
||||
MagicPrintLane64(w);
|
||||
break;
|
||||
case MAGIC_PRINT_WF_FLOAT:
|
||||
MagicPrintWFFloat(w);
|
||||
break;
|
||||
case MAGIC_SIM_BREAK:
|
||||
MagicSimBreak(w);
|
||||
break;
|
||||
case MAGIC_PREF_SUM:
|
||||
MagicPrefixSum(w);
|
||||
break;
|
||||
case MAGIC_REDUCTION:
|
||||
MagicReduction(w);
|
||||
break;
|
||||
case MAGIC_MASKLANE_LOWER:
|
||||
MagicMaskLower(w);
|
||||
break;
|
||||
case MAGIC_MASKLANE_UPPER:
|
||||
MagicMaskUpper(w);
|
||||
break;
|
||||
case MAGIC_JOIN_WF_BAR:
|
||||
MagicJoinWFBar(w);
|
||||
break;
|
||||
case MAGIC_WAIT_WF_BAR:
|
||||
MagicWaitWFBar(w);
|
||||
break;
|
||||
case MAGIC_PANIC:
|
||||
MagicPanic(w);
|
||||
break;
|
||||
|
||||
// atomic instructions
|
||||
case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG:
|
||||
MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
|
||||
break;
|
||||
|
||||
case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG:
|
||||
MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
|
||||
break;
|
||||
|
||||
case MAGIC_LOAD_GLOBAL_U32_REG:
|
||||
MagicLoadGlobalU32Reg(w, gpuDynInst);
|
||||
break;
|
||||
|
||||
case MAGIC_XACT_CAS_LD:
|
||||
MagicXactCasLd(w);
|
||||
break;
|
||||
|
||||
case MAGIC_MOST_SIG_THD:
|
||||
MagicMostSigThread(w);
|
||||
break;
|
||||
|
||||
case MAGIC_MOST_SIG_BROADCAST:
|
||||
MagicMostSigBroadcast(w);
|
||||
break;
|
||||
|
||||
case MAGIC_PRINT_WFID_32:
|
||||
MagicPrintWF32ID(w);
|
||||
break;
|
||||
|
||||
case MAGIC_PRINT_WFID_64:
|
||||
MagicPrintWFID64(w);
|
||||
break;
|
||||
|
||||
default: fatal("unrecognized magic instruction: %d\n", op);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintLane(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
if (src_val2) {
|
||||
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
|
||||
disassemble(), w->computeUnit->cu_id, w->simdId,
|
||||
w->wfSlotId, lane, src_val1);
|
||||
} else {
|
||||
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
|
||||
disassemble(), w->computeUnit->cu_id, w->simdId,
|
||||
w->wfSlotId, lane, src_val1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintLane64(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
if (src_val2) {
|
||||
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
|
||||
disassemble(), w->computeUnit->cu_id, w->simdId,
|
||||
w->wfSlotId, lane, src_val1);
|
||||
} else {
|
||||
DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
|
||||
disassemble(), w->computeUnit->cu_id, w->simdId,
|
||||
w->wfSlotId, lane, src_val1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintWF32(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
std::string res_str;
|
||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (!(lane & 7)) {
|
||||
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
|
||||
}
|
||||
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
|
||||
if (src_val2) {
|
||||
res_str += csprintf("%08x", src_val1);
|
||||
} else {
|
||||
res_str += csprintf("%08d", src_val1);
|
||||
}
|
||||
} else {
|
||||
res_str += csprintf("xxxxxxxx");
|
||||
}
|
||||
|
||||
if ((lane & 7) == 7) {
|
||||
res_str += csprintf("\n");
|
||||
} else {
|
||||
res_str += csprintf(" ");
|
||||
}
|
||||
}
|
||||
|
||||
res_str += "\n\n";
|
||||
DPRINTFN(res_str.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintWF32ID(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
std::string res_str;
|
||||
int src_val3 = -1;
|
||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (!(lane & 7)) {
|
||||
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
|
||||
}
|
||||
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
src_val3 = src1.get<int>(w, lane, 3);
|
||||
|
||||
if (src_val2) {
|
||||
res_str += csprintf("%08x", src_val1);
|
||||
} else {
|
||||
res_str += csprintf("%08d", src_val1);
|
||||
}
|
||||
} else {
|
||||
res_str += csprintf("xxxxxxxx");
|
||||
}
|
||||
|
||||
if ((lane & 7) == 7) {
|
||||
res_str += csprintf("\n");
|
||||
} else {
|
||||
res_str += csprintf(" ");
|
||||
}
|
||||
}
|
||||
|
||||
res_str += "\n\n";
|
||||
if (w->wfDynId == src_val3) {
|
||||
DPRINTFN(res_str.c_str());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintWF64(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
std::string res_str;
|
||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (!(lane & 3)) {
|
||||
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
|
||||
}
|
||||
|
||||
if (mask[lane]) {
|
||||
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
|
||||
if (src_val2) {
|
||||
res_str += csprintf("%016x", src_val1);
|
||||
} else {
|
||||
res_str += csprintf("%016d", src_val1);
|
||||
}
|
||||
} else {
|
||||
res_str += csprintf("xxxxxxxxxxxxxxxx");
|
||||
}
|
||||
|
||||
if ((lane & 3) == 3) {
|
||||
res_str += csprintf("\n");
|
||||
} else {
|
||||
res_str += csprintf(" ");
|
||||
}
|
||||
}
|
||||
|
||||
res_str += "\n\n";
|
||||
DPRINTFN(res_str.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintWFID64(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
std::string res_str;
|
||||
int src_val3 = -1;
|
||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (!(lane & 3)) {
|
||||
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
|
||||
}
|
||||
|
||||
if (mask[lane]) {
|
||||
int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
src_val3 = src1.get<int>(w, lane, 3);
|
||||
|
||||
if (src_val2) {
|
||||
res_str += csprintf("%016x", src_val1);
|
||||
} else {
|
||||
res_str += csprintf("%016d", src_val1);
|
||||
}
|
||||
} else {
|
||||
res_str += csprintf("xxxxxxxxxxxxxxxx");
|
||||
}
|
||||
|
||||
if ((lane & 3) == 3) {
|
||||
res_str += csprintf("\n");
|
||||
} else {
|
||||
res_str += csprintf(" ");
|
||||
}
|
||||
}
|
||||
|
||||
res_str += "\n\n";
|
||||
if (w->wfDynId == src_val3) {
|
||||
DPRINTFN(res_str.c_str());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrintWFFloat(Wavefront *w)
|
||||
{
|
||||
#if TRACING_ON
|
||||
const VectorMask &mask = w->getPred();
|
||||
std::string res_str;
|
||||
res_str = csprintf("krl_prt (%s)\n", disassemble());
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (!(lane & 7)) {
|
||||
res_str += csprintf("DB%03d: ", (int)w->wfDynId);
|
||||
}
|
||||
|
||||
if (mask[lane]) {
|
||||
float src_val1 = src1.get<float>(w, lane, 1);
|
||||
res_str += csprintf("%08f", src_val1);
|
||||
} else {
|
||||
res_str += csprintf("xxxxxxxx");
|
||||
}
|
||||
|
||||
if ((lane & 7) == 7) {
|
||||
res_str += csprintf("\n");
|
||||
} else {
|
||||
res_str += csprintf(" ");
|
||||
}
|
||||
}
|
||||
|
||||
res_str += "\n\n";
|
||||
DPRINTFN(res_str.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
// raises a signal that GDB will catch
|
||||
// when done with the break, type "signal 0" in gdb to continue
|
||||
void
|
||||
Call::MagicSimBreak(Wavefront *w)
|
||||
{
|
||||
std::string res_str;
|
||||
// print out state for this wavefront and then break
|
||||
res_str = csprintf("Breakpoint encountered for wavefront %i\n",
|
||||
w->wfSlotId);
|
||||
|
||||
res_str += csprintf(" Kern ID: %i\n", w->kernId);
|
||||
res_str += csprintf(" Phase ID: %i\n", w->simdId);
|
||||
res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
|
||||
res_str += csprintf(" Exec mask: ");
|
||||
|
||||
for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
|
||||
if (w->execMask(i))
|
||||
res_str += "1";
|
||||
else
|
||||
res_str += "0";
|
||||
|
||||
if ((i & 7) == 7)
|
||||
res_str += " ";
|
||||
}
|
||||
|
||||
res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
|
||||
|
||||
res_str += "\nHelpful debugging hints:\n";
|
||||
res_str += " Check out w->s_reg / w->d_reg for register state\n";
|
||||
|
||||
res_str += "\n\n";
|
||||
DPRINTFN(res_str.c_str());
|
||||
fflush(stdout);
|
||||
|
||||
raise(SIGTRAP);
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPrefixSum(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int res = 0;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
dest.set<int>(w, lane, res);
|
||||
res += src_val1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicReduction(Wavefront *w)
|
||||
{
|
||||
// reduction magic instruction
|
||||
// The reduction instruction takes up to 64 inputs (one from
|
||||
// each thread in a WF) and sums them. It returns the sum to
|
||||
// each thread in the WF.
|
||||
const VectorMask &mask = w->getPred();
|
||||
int res = 0;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
res += src_val1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
dest.set<int>(w, lane, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicMaskLower(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int res = 0;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
|
||||
if (src_val1) {
|
||||
if (lane < (w->computeUnit->wfSize()/2)) {
|
||||
res = res | ((uint32_t)(1) << lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
dest.set<int>(w, lane, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicMaskUpper(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int res = 0;
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
|
||||
if (src_val1) {
|
||||
if (lane >= (w->computeUnit->wfSize()/2)) {
|
||||
res = res | ((uint32_t)(1) <<
|
||||
(lane - (w->computeUnit->wfSize()/2)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
dest.set<int>(w, lane, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicJoinWFBar(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int max_cnt = 0;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
w->barCnt[lane]++;
|
||||
|
||||
if (w->barCnt[lane] > max_cnt) {
|
||||
max_cnt = w->barCnt[lane];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (max_cnt > w->maxBarCnt) {
|
||||
w->maxBarCnt = max_cnt;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicWaitWFBar(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int max_cnt = 0;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
w->barCnt[lane]--;
|
||||
}
|
||||
|
||||
if (w->barCnt[lane] > max_cnt) {
|
||||
max_cnt = w->barCnt[lane];
|
||||
}
|
||||
}
|
||||
|
||||
if (max_cnt < w->maxBarCnt) {
|
||||
w->maxBarCnt = max_cnt;
|
||||
}
|
||||
|
||||
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
|
||||
w->instructionBuffer.end());
|
||||
if (w->pendingFetch)
|
||||
w->dropFetch = true;
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicPanic(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
|
||||
src_val1, lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
|
||||
{
|
||||
// the address is in src1 | src2
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
int src_val1 = src1.get<int>(w, lane, 1);
|
||||
int src_val2 = src1.get<int>(w, lane, 2);
|
||||
Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
|
||||
|
||||
m->addr[lane] = addr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
GPUDynInstPtr m = gpuDynInst;
|
||||
|
||||
calcAddr(w, m);
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
|
||||
}
|
||||
|
||||
setFlag(AtomicNoReturn);
|
||||
setFlag(AtomicAdd);
|
||||
setFlag(NoScope);
|
||||
setFlag(NoOrder);
|
||||
setFlag(GlobalSegment);
|
||||
|
||||
m->m_type = U32::memType;
|
||||
m->v_type = U32::vgprType;
|
||||
|
||||
m->exec_mask = w->execMask();
|
||||
m->statusBitVector = 0;
|
||||
m->equiv = 0; // atomics don't have an equivalence class operand
|
||||
m->n_reg = 1;
|
||||
|
||||
m->simdId = w->simdId;
|
||||
m->wfSlotId = w->wfSlotId;
|
||||
m->wfDynId = w->wfDynId;
|
||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(64));
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsWrGm++;
|
||||
w->wrGmReqsInPipe--;
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
w->outstandingReqs++;
|
||||
w->memReqsInPipe--;
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
GPUDynInstPtr m = gpuDynInst;
|
||||
calcAddr(w, m);
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
|
||||
}
|
||||
|
||||
setFlag(AtomicNoReturn);
|
||||
setFlag(AtomicAdd);
|
||||
setFlag(NoScope);
|
||||
setFlag(NoOrder);
|
||||
setFlag(GlobalSegment);
|
||||
|
||||
m->m_type = U32::memType;
|
||||
m->v_type = U32::vgprType;
|
||||
|
||||
m->exec_mask = w->execMask();
|
||||
m->statusBitVector = 0;
|
||||
m->equiv = 0; // atomics don't have an equivalence class operand
|
||||
m->n_reg = 1;
|
||||
|
||||
m->simdId = w->simdId;
|
||||
m->wfSlotId = w->wfSlotId;
|
||||
m->wfDynId = w->wfDynId;
|
||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(64));
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsWrGm++;
|
||||
w->wrGmReqsInPipe--;
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
w->outstandingReqs++;
|
||||
w->memReqsInPipe--;
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
GPUDynInstPtr m = gpuDynInst;
|
||||
// calculate the address
|
||||
calcAddr(w, m);
|
||||
|
||||
setFlag(Load);
|
||||
setFlag(NoScope);
|
||||
setFlag(NoOrder);
|
||||
setFlag(GlobalSegment);
|
||||
|
||||
m->m_type = U32::memType; //MemDataType::memType;
|
||||
m->v_type = U32::vgprType; //DestDataType::vgprType;
|
||||
|
||||
m->exec_mask = w->execMask();
|
||||
m->statusBitVector = 0;
|
||||
m->equiv = 0;
|
||||
m->n_reg = 1;
|
||||
|
||||
// FIXME
|
||||
//m->dst_reg = this->dest.regIndex();
|
||||
|
||||
m->simdId = w->simdId;
|
||||
m->wfSlotId = w->wfSlotId;
|
||||
m->wfDynId = w->wfDynId;
|
||||
m->latency.init(&w->computeUnit->shader->tick_cnt);
|
||||
|
||||
m->pipeId = GLBMEM_PIPE;
|
||||
m->latency.set(w->computeUnit->shader->ticks(1));
|
||||
w->computeUnit->globalMemoryPipe.issueRequest(m);
|
||||
w->outstandingReqsRdGm++;
|
||||
w->rdGmReqsInPipe--;
|
||||
w->outstandingReqs++;
|
||||
w->memReqsInPipe--;
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicXactCasLd(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int src_val1 = 0;
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (mask[lane]) {
|
||||
src_val1 = src1.get<int>(w, lane, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
|
||||
w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue();
|
||||
w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
|
||||
}
|
||||
|
||||
w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
|
||||
.push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicMostSigThread(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
unsigned mst = true;
|
||||
|
||||
for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
|
||||
if (mask[lane]) {
|
||||
dest.set<int>(w, lane, mst);
|
||||
mst = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Call::MagicMostSigBroadcast(Wavefront *w)
|
||||
{
|
||||
const VectorMask &mask = w->getPred();
|
||||
int res = 0;
|
||||
bool got_res = false;
|
||||
|
||||
for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
|
||||
if (mask[lane]) {
|
||||
if (!got_res) {
|
||||
res = src1.get<int>(w, lane, 1);
|
||||
got_res = true;
|
||||
}
|
||||
dest.set<int>(w, lane, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace HsailISA
|
||||
@@ -1,468 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#include "arch/hsail/operand.hh"
|
||||
|
||||
using namespace Brig;
|
||||
|
||||
bool
|
||||
BaseRegOperand::init(unsigned opOffset, const BrigObject *obj,
|
||||
unsigned &maxRegIdx, char _regFileChar)
|
||||
{
|
||||
regFileChar = _regFileChar;
|
||||
const BrigOperand *brigOp = obj->getOperand(opOffset);
|
||||
|
||||
if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER)
|
||||
return false;
|
||||
|
||||
const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp;
|
||||
|
||||
regIdx = brigRegOp->regNum;
|
||||
|
||||
DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx,
|
||||
brigRegOp->regKind);
|
||||
|
||||
maxRegIdx = std::max(maxRegIdx, regIdx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ListOperand::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset);
|
||||
|
||||
switch (brigOp->kind) {
|
||||
case BRIG_KIND_OPERAND_CODE_LIST:
|
||||
{
|
||||
const BrigOperandCodeList *opList =
|
||||
(const BrigOperandCodeList*)brigOp;
|
||||
|
||||
const Brig::BrigData *oprnd_data =
|
||||
obj->getBrigBaseData(opList->elements);
|
||||
|
||||
// Note: for calls Dest list of operands could be size of 0.
|
||||
elementCount = oprnd_data->byteCount / 4;
|
||||
|
||||
DPRINTF(GPUReg, "Operand Code List: # elements: %d\n",
|
||||
elementCount);
|
||||
|
||||
for (int i = 0; i < elementCount; ++i) {
|
||||
unsigned *data_offset =
|
||||
(unsigned*)obj->getData(opList->elements + 4 * (i + 1));
|
||||
|
||||
const BrigDirectiveVariable *p =
|
||||
(const BrigDirectiveVariable*)obj->
|
||||
getCodeSectionEntry(*data_offset);
|
||||
|
||||
StorageElement *se = obj->currentCode->storageMap->
|
||||
findSymbol(BRIG_SEGMENT_ARG, p);
|
||||
|
||||
assert(se);
|
||||
callArgs.push_back(se);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fatal("ListOperand: bad operand kind %d\n", brigOp->kind);
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ListOperand::disassemble()
|
||||
{
|
||||
std::string res_str("");
|
||||
|
||||
for (auto it : callArgs) {
|
||||
res_str += csprintf("%s ", it->name.c_str());
|
||||
}
|
||||
|
||||
return res_str;
|
||||
}
|
||||
|
||||
void
|
||||
FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
const BrigOperand *baseOp = obj->getOperand(opOffset);
|
||||
|
||||
if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) {
|
||||
fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind);
|
||||
}
|
||||
|
||||
const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp;
|
||||
|
||||
const BrigDirectiveExecutable *p =
|
||||
(const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref);
|
||||
|
||||
func_name = obj->getString(p->name);
|
||||
}
|
||||
|
||||
std::string
|
||||
FunctionRefOperand::disassemble()
|
||||
{
|
||||
DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name);
|
||||
|
||||
return csprintf("%s", func_name);
|
||||
}
|
||||
|
||||
bool
|
||||
BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj,
|
||||
int at, unsigned &maxRegIdx, char _regFileChar)
|
||||
{
|
||||
regFileChar = _regFileChar;
|
||||
const BrigOperand *brigOp = obj->getOperand(opOffset);
|
||||
|
||||
if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
|
||||
return false;
|
||||
|
||||
|
||||
const Brig::BrigOperandOperandList *brigRegVecOp =
|
||||
(const Brig::BrigOperandOperandList*)brigOp;
|
||||
|
||||
unsigned *data_offset =
|
||||
(unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1));
|
||||
|
||||
const BrigOperand *p =
|
||||
(const BrigOperand*)obj->getOperand(*data_offset);
|
||||
if (p->kind != BRIG_KIND_OPERAND_REGISTER) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p;
|
||||
|
||||
regIdx = brigRegOp->regNum;
|
||||
|
||||
DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx,
|
||||
brigRegOp->regKind);
|
||||
|
||||
maxRegIdx = std::max(maxRegIdx, regIdx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj,
|
||||
unsigned &maxRegIdx, char _regFileChar)
|
||||
{
|
||||
const char *name = obj->getString(strOffset);
|
||||
char *endptr;
|
||||
regIdx = strtoul(name + 2, &endptr, 10);
|
||||
|
||||
if (name[0] != '$' || name[1] != _regFileChar) {
|
||||
fatal("register operand parse error on \"%s\"\n", name);
|
||||
}
|
||||
|
||||
maxRegIdx = std::max(maxRegIdx, regIdx);
|
||||
}
|
||||
|
||||
unsigned SRegOperand::maxRegIdx;
|
||||
unsigned DRegOperand::maxRegIdx;
|
||||
unsigned CRegOperand::maxRegIdx;
|
||||
|
||||
std::string
|
||||
SRegOperand::disassemble()
|
||||
{
|
||||
return csprintf("$s%d", regIdx);
|
||||
}
|
||||
|
||||
std::string
|
||||
DRegOperand::disassemble()
|
||||
{
|
||||
return csprintf("$d%d", regIdx);
|
||||
}
|
||||
|
||||
std::string
|
||||
CRegOperand::disassemble()
|
||||
{
|
||||
return csprintf("$c%d", regIdx);
|
||||
}
|
||||
|
||||
BrigRegOperandInfo
|
||||
findRegDataType(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
const BrigOperand *baseOp = obj->getOperand(opOffset);
|
||||
|
||||
switch (baseOp->kind) {
|
||||
case BRIG_KIND_OPERAND_REGISTER:
|
||||
{
|
||||
const BrigOperandRegister *op = (BrigOperandRegister*)baseOp;
|
||||
|
||||
return BrigRegOperandInfo((BrigKind16_t)baseOp->kind,
|
||||
(BrigRegisterKind)op->regKind);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_OPERAND_WAVESIZE:
|
||||
{
|
||||
BrigRegisterKind reg_kind = BRIG_REGISTER_KIND_DOUBLE;
|
||||
return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
|
||||
}
|
||||
|
||||
case BRIG_KIND_OPERAND_OPERAND_LIST:
|
||||
{
|
||||
const BrigOperandOperandList *op =
|
||||
(BrigOperandOperandList*)baseOp;
|
||||
const BrigData *data_p = (BrigData*)obj->getData(op->elements);
|
||||
|
||||
|
||||
int num_operands = 0;
|
||||
BrigRegisterKind reg_kind = (BrigRegisterKind)0;
|
||||
for (int offset = 0; offset < data_p->byteCount; offset += 4) {
|
||||
const BrigOperand *op_p = (const BrigOperand *)
|
||||
obj->getOperand(((int *)data_p->bytes)[offset/4]);
|
||||
|
||||
if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) {
|
||||
const BrigOperandRegister *brigRegOp =
|
||||
(const BrigOperandRegister*)op_p;
|
||||
reg_kind = (BrigRegisterKind)brigRegOp->regKind;
|
||||
} else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) {
|
||||
uint16_t num_bytes =
|
||||
((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount
|
||||
- sizeof(BrigBase);
|
||||
if (num_bytes == sizeof(uint32_t)) {
|
||||
reg_kind = BRIG_REGISTER_KIND_SINGLE;
|
||||
} else if (num_bytes == sizeof(uint64_t)) {
|
||||
reg_kind = BRIG_REGISTER_KIND_DOUBLE;
|
||||
} else {
|
||||
fatal("OperandList: bad operand size %d\n", num_bytes);
|
||||
}
|
||||
} else if (op_p->kind == BRIG_KIND_OPERAND_WAVESIZE) {
|
||||
reg_kind = BRIG_REGISTER_KIND_DOUBLE;
|
||||
} else {
|
||||
fatal("OperandList: bad operand kind %d\n", op_p->kind);
|
||||
}
|
||||
|
||||
num_operands++;
|
||||
}
|
||||
assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST);
|
||||
|
||||
return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_OPERAND_ADDRESS:
|
||||
{
|
||||
const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
|
||||
|
||||
if (!op->reg) {
|
||||
BrigType type = BRIG_TYPE_NONE;
|
||||
|
||||
if (op->symbol) {
|
||||
const BrigDirective *dir = (BrigDirective*)
|
||||
obj->getCodeSectionEntry(op->symbol);
|
||||
|
||||
assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE);
|
||||
|
||||
const BrigDirectiveVariable *sym =
|
||||
(const BrigDirectiveVariable*)dir;
|
||||
|
||||
type = (BrigType)sym->type;
|
||||
}
|
||||
return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS,
|
||||
(BrigType)type);
|
||||
} else {
|
||||
const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp;
|
||||
const BrigOperand *reg = obj->getOperand(b->reg);
|
||||
const BrigOperandRegister *rop = (BrigOperandRegister*)reg;
|
||||
|
||||
return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER,
|
||||
(BrigRegisterKind)rop->regKind);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("AddrOperand: bad operand kind %d\n", baseOp->kind);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj)
|
||||
{
|
||||
assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS);
|
||||
|
||||
const BrigDirective *d =
|
||||
(BrigDirective*)obj->getCodeSectionEntry(op->symbol);
|
||||
|
||||
/**
|
||||
* HSAIL does not properly handle immediate offsets for instruction types
|
||||
* that utilize them. It currently only supports instructions that use
|
||||
* variables instead. Again, these pop up in code that is never executed
|
||||
* (i.e. the HCC AMP codes) so we just hack it here to let us pass through
|
||||
* the HSAIL object initialization. If such code is ever called, we would
|
||||
* have to implement this properly.
|
||||
*/
|
||||
if (d->kind != BRIG_KIND_DIRECTIVE_VARIABLE) {
|
||||
warn("HSAIL implementation does not support instructions with "
|
||||
"address calculations where the operand is not a variable\n");
|
||||
}
|
||||
|
||||
const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d;
|
||||
name = obj->getString(sym->name);
|
||||
|
||||
if (sym->segment != BRIG_SEGMENT_ARG) {
|
||||
storageElement =
|
||||
obj->currentCode->storageMap->findSymbol(sym->segment, name);
|
||||
offset = 0;
|
||||
} else {
|
||||
// sym->name does not work for BRIG_SEGMENT_ARG for the following case:
|
||||
//
|
||||
// void foo(int a);
|
||||
// void bar(double a);
|
||||
//
|
||||
// foo(...) --> arg_u32 %param_p0;
|
||||
// st_arg_u32 $s0, [%param_p0];
|
||||
// call &foo (%param_p0);
|
||||
// bar(...) --> arg_f64 %param_p0;
|
||||
// st_arg_u64 $d0, [%param_p0];
|
||||
// call &foo (%param_p0);
|
||||
//
|
||||
// Both functions use the same variable name (param_p0)!!!
|
||||
//
|
||||
// Maybe this is a bug in the compiler (I don't know).
|
||||
//
|
||||
// Solution:
|
||||
// Use directive pointer (BrigDirectiveVariable) to differentiate 2
|
||||
// versions of param_p0.
|
||||
//
|
||||
// Note this solution is kind of stupid, because we are pulling stuff
|
||||
// out of the brig binary via the directive pointer and putting it into
|
||||
// the symbol table, but now we are indexing the symbol table by the
|
||||
// brig directive pointer! It makes the symbol table sort of pointless.
|
||||
// But I don't want to mess with the rest of the infrastructure, so
|
||||
// let's go with this for now.
|
||||
//
|
||||
// When we update the compiler again, we should see if this problem goes
|
||||
// away. If so, we can fold some of this functionality into the code for
|
||||
// kernel arguments. If not, maybe we can index the symbol name on a
|
||||
// hash of the variable AND function name
|
||||
storageElement = obj->currentCode->
|
||||
storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym);
|
||||
|
||||
assert(storageElement);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
AddrOperandBase::calcUniformBase()
|
||||
{
|
||||
// start with offset, will be 0 if not specified
|
||||
uint64_t address = offset;
|
||||
|
||||
// add in symbol value if specified
|
||||
if (storageElement) {
|
||||
address += storageElement->offset;
|
||||
}
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
std::string
|
||||
AddrOperandBase::disassemble(std::string reg_disassembly)
|
||||
{
|
||||
std::string disasm;
|
||||
|
||||
if (offset || reg_disassembly != "") {
|
||||
disasm += "[";
|
||||
|
||||
if (reg_disassembly != "") {
|
||||
disasm += reg_disassembly;
|
||||
|
||||
if (offset > 0) {
|
||||
disasm += "+";
|
||||
}
|
||||
}
|
||||
|
||||
if (offset) {
|
||||
disasm += csprintf("%d", offset);
|
||||
}
|
||||
|
||||
disasm += "]";
|
||||
} else if (name) {
|
||||
disasm += csprintf("[%s]", name);
|
||||
}
|
||||
|
||||
return disasm;
|
||||
}
|
||||
|
||||
void
|
||||
NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
const BrigOperand *baseOp = obj->getOperand(opOffset);
|
||||
|
||||
if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) {
|
||||
BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp;
|
||||
parseAddr(addrOp, obj);
|
||||
offset = (uint64_t(addrOp->offset.hi) << 32) |
|
||||
uint64_t(addrOp->offset.lo);
|
||||
} else {
|
||||
fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::string
|
||||
NoRegAddrOperand::disassemble()
|
||||
{
|
||||
return AddrOperandBase::disassemble(std::string(""));
|
||||
}
|
||||
|
||||
void
|
||||
LabelOperand::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
const BrigOperandCodeRef *op =
|
||||
(const BrigOperandCodeRef*)obj->getOperand(opOffset);
|
||||
|
||||
assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF);
|
||||
|
||||
const BrigDirective *dir =
|
||||
(const BrigDirective*)obj->getCodeSectionEntry(op->ref);
|
||||
|
||||
assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL);
|
||||
label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
LabelOperand::getTarget(Wavefront *w, int lane)
|
||||
{
|
||||
return label->get();
|
||||
}
|
||||
|
||||
std::string
|
||||
LabelOperand::disassemble()
|
||||
{
|
||||
return label->name;
|
||||
}
|
||||
@@ -1,796 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_HSAIL_OPERAND_HH__
|
||||
#define __ARCH_HSAIL_OPERAND_HH__
|
||||
|
||||
/**
|
||||
* @file operand.hh
|
||||
*
|
||||
* Defines classes encapsulating HSAIL instruction operands.
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "arch/hsail/Brig.h"
|
||||
#include "base/trace.hh"
|
||||
#include "base/types.hh"
|
||||
#include "debug/GPUReg.hh"
|
||||
#include "enums/RegisterType.hh"
|
||||
#include "gpu-compute/brig_object.hh"
|
||||
#include "gpu-compute/compute_unit.hh"
|
||||
#include "gpu-compute/hsail_code.hh"
|
||||
#include "gpu-compute/shader.hh"
|
||||
#include "gpu-compute/vector_register_file.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
class Label;
|
||||
class StorageElement;
|
||||
|
||||
class BaseOperand
|
||||
{
|
||||
public:
|
||||
Enums::RegisterType registerType;
|
||||
uint32_t regOperandSize;
|
||||
BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; }
|
||||
bool isVectorRegister() { return registerType == Enums::RT_VECTOR; }
|
||||
bool isScalarRegister() { return registerType == Enums::RT_SCALAR; }
|
||||
bool isCondRegister() { return registerType == Enums::RT_CONDITION; }
|
||||
unsigned int regIndex() { return 0; }
|
||||
uint32_t opSize() { return regOperandSize; }
|
||||
virtual ~BaseOperand() { }
|
||||
};
|
||||
|
||||
class BrigRegOperandInfo
|
||||
{
|
||||
public:
|
||||
Brig::BrigKind16_t kind;
|
||||
Brig::BrigType type;
|
||||
Brig::BrigRegisterKind regKind;
|
||||
|
||||
BrigRegOperandInfo(Brig::BrigKind16_t _kind,
|
||||
Brig::BrigRegisterKind _regKind)
|
||||
: kind(_kind), regKind(_regKind)
|
||||
{
|
||||
}
|
||||
|
||||
BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type)
|
||||
: kind(_kind), type(_type)
|
||||
{
|
||||
}
|
||||
|
||||
BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES),
|
||||
type(Brig::BRIG_TYPE_NONE)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj);
|
||||
|
||||
class BaseRegOperand : public BaseOperand
|
||||
{
|
||||
public:
|
||||
unsigned regIdx;
|
||||
char regFileChar;
|
||||
|
||||
bool init(unsigned opOffset, const BrigObject *obj,
|
||||
unsigned &maxRegIdx, char _regFileChar);
|
||||
|
||||
bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at,
|
||||
unsigned &maxRegIdx, char _regFileChar);
|
||||
|
||||
void initWithStrOffset(unsigned strOffset, const BrigObject *obj,
|
||||
unsigned &maxRegIdx, char _regFileChar);
|
||||
unsigned int regIndex() { return regIdx; }
|
||||
};
|
||||
|
||||
class SRegOperand : public BaseRegOperand
|
||||
{
|
||||
public:
|
||||
static unsigned maxRegIdx;
|
||||
|
||||
bool
|
||||
init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
regOperandSize = sizeof(uint32_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
|
||||
return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's');
|
||||
}
|
||||
|
||||
bool
|
||||
init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
|
||||
{
|
||||
regOperandSize = sizeof(uint32_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
|
||||
return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
|
||||
's');
|
||||
}
|
||||
|
||||
void
|
||||
initWithStrOffset(unsigned strOffset, const BrigObject *obj)
|
||||
{
|
||||
regOperandSize = sizeof(uint32_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
|
||||
return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
|
||||
's');
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w, int lane)
|
||||
{
|
||||
assert(sizeof(OperandType) <= sizeof(uint32_t));
|
||||
assert(regIdx < w->maxSpVgprs);
|
||||
// if OperandType is smaller than 32-bit, we truncate the value
|
||||
OperandType ret;
|
||||
uint32_t vgprIdx;
|
||||
|
||||
switch (sizeof(OperandType)) {
|
||||
case 1: // 1 byte operand
|
||||
vgprIdx = w->remap(regIdx, 1, 1);
|
||||
ret = (w->computeUnit->vrf[w->simdId]->
|
||||
read<uint32_t>(vgprIdx, lane)) & 0xff;
|
||||
break;
|
||||
case 2: // 2 byte operand
|
||||
vgprIdx = w->remap(regIdx, 2, 1);
|
||||
ret = (w->computeUnit->vrf[w->simdId]->
|
||||
read<uint32_t>(vgprIdx, lane)) & 0xffff;
|
||||
break;
|
||||
case 4: // 4 byte operand
|
||||
vgprIdx = w->remap(regIdx,sizeof(OperandType), 1);
|
||||
ret = w->computeUnit->vrf[w->simdId]->
|
||||
read<OperandType>(vgprIdx, lane);
|
||||
break;
|
||||
default:
|
||||
panic("Bad OperandType\n");
|
||||
break;
|
||||
}
|
||||
|
||||
return (OperandType)ret;
|
||||
}
|
||||
|
||||
// special get method for compatibility with LabelOperand
|
||||
uint32_t
|
||||
getTarget(Wavefront *w, int lane)
|
||||
{
|
||||
return get<uint32_t>(w, lane);
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
void set(Wavefront *w, int lane, OperandType &val);
|
||||
std::string disassemble();
|
||||
};
|
||||
|
||||
template<typename OperandType>
|
||||
void
|
||||
SRegOperand::set(Wavefront *w, int lane, OperandType &val)
|
||||
{
|
||||
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
|
||||
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
|
||||
|
||||
assert(sizeof(OperandType) == sizeof(uint32_t));
|
||||
assert(regIdx < w->maxSpVgprs);
|
||||
uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
|
||||
w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void
|
||||
SRegOperand::set(Wavefront *w, int lane, uint64_t &val)
|
||||
{
|
||||
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
|
||||
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
|
||||
|
||||
assert(regIdx < w->maxSpVgprs);
|
||||
uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1);
|
||||
w->computeUnit->vrf[w->simdId]->write<uint32_t>(vgprIdx, val, lane);
|
||||
}
|
||||
|
||||
class DRegOperand : public BaseRegOperand
|
||||
{
|
||||
public:
|
||||
static unsigned maxRegIdx;
|
||||
|
||||
bool
|
||||
init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
regOperandSize = sizeof(uint64_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
|
||||
return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd');
|
||||
}
|
||||
|
||||
bool
|
||||
init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
|
||||
{
|
||||
regOperandSize = sizeof(uint64_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
|
||||
return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
|
||||
'd');
|
||||
}
|
||||
|
||||
void
|
||||
initWithStrOffset(unsigned strOffset, const BrigObject *obj)
|
||||
{
|
||||
regOperandSize = sizeof(uint64_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
|
||||
return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
|
||||
'd');
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w, int lane)
|
||||
{
|
||||
assert(sizeof(OperandType) <= sizeof(uint64_t));
|
||||
// TODO: this check is valid only for HSAIL
|
||||
assert(regIdx < w->maxDpVgprs);
|
||||
uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
|
||||
|
||||
return w->computeUnit->vrf[w->simdId]->read<OperandType>(vgprIdx,lane);
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
void
|
||||
set(Wavefront *w, int lane, OperandType &val)
|
||||
{
|
||||
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n",
|
||||
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
|
||||
val);
|
||||
|
||||
assert(sizeof(OperandType) <= sizeof(uint64_t));
|
||||
// TODO: this check is valid only for HSAIL
|
||||
assert(regIdx < w->maxDpVgprs);
|
||||
uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
|
||||
w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
|
||||
}
|
||||
|
||||
std::string disassemble();
|
||||
};
|
||||
|
||||
class CRegOperand : public BaseRegOperand
|
||||
{
|
||||
public:
|
||||
static unsigned maxRegIdx;
|
||||
|
||||
bool
|
||||
init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
regOperandSize = sizeof(uint8_t);
|
||||
registerType = Enums::RT_CONDITION;
|
||||
|
||||
return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c');
|
||||
}
|
||||
|
||||
bool
|
||||
init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
|
||||
{
|
||||
regOperandSize = sizeof(uint8_t);
|
||||
registerType = Enums::RT_CONDITION;
|
||||
|
||||
return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
|
||||
'c');
|
||||
}
|
||||
|
||||
void
|
||||
initWithStrOffset(unsigned strOffset, const BrigObject *obj)
|
||||
{
|
||||
regOperandSize = sizeof(uint8_t);
|
||||
registerType = Enums::RT_CONDITION;
|
||||
|
||||
return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
|
||||
'c');
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w, int lane)
|
||||
{
|
||||
assert(regIdx < w->condRegState->numRegs());
|
||||
|
||||
return w->condRegState->read<OperandType>((int)regIdx, lane);
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
void
|
||||
set(Wavefront *w, int lane, OperandType &val)
|
||||
{
|
||||
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n",
|
||||
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
|
||||
val);
|
||||
|
||||
assert(regIdx < w->condRegState->numRegs());
|
||||
w->condRegState->write<OperandType>(regIdx,lane,val);
|
||||
}
|
||||
|
||||
std::string disassemble();
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class ImmOperand : public BaseOperand
|
||||
{
|
||||
private:
|
||||
uint16_t kind;
|
||||
public:
|
||||
T bits;
|
||||
|
||||
bool init(unsigned opOffset, const BrigObject *obj);
|
||||
bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
|
||||
std::string disassemble();
|
||||
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w)
|
||||
{
|
||||
assert(sizeof(OperandType) <= sizeof(T));
|
||||
panic_if(w == nullptr, "WF pointer needs to be set");
|
||||
|
||||
switch (kind) {
|
||||
// immediate operand is WF size
|
||||
case Brig::BRIG_KIND_OPERAND_WAVESIZE:
|
||||
return (OperandType)w->computeUnit->wfSize();
|
||||
break;
|
||||
|
||||
default:
|
||||
return *(OperandType*)&bits;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// This version of get() takes a WF* and a lane id for
|
||||
// compatibility with the register-based get() methods.
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w, int lane)
|
||||
{
|
||||
return get<OperandType>(w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
bool
|
||||
ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
|
||||
|
||||
switch (brigOp->kind) {
|
||||
// this is immediate operand
|
||||
case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES:
|
||||
{
|
||||
DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T),
|
||||
brigOp->byteCount);
|
||||
|
||||
auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
|
||||
|
||||
bits = *((T*)(obj->getData(cbptr->bytes + 4)));
|
||||
kind = brigOp->kind;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
case Brig::BRIG_KIND_OPERAND_WAVESIZE:
|
||||
kind = brigOp->kind;
|
||||
bits = std::numeric_limits<unsigned long long>::digits;
|
||||
return true;
|
||||
|
||||
default:
|
||||
kind = Brig::BRIG_KIND_NONE;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool
|
||||
ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
|
||||
{
|
||||
const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
|
||||
|
||||
if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
|
||||
kind = Brig::BRIG_KIND_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
const Brig::BrigOperandOperandList *brigVecOp =
|
||||
(const Brig::BrigOperandOperandList *)brigOp;
|
||||
|
||||
unsigned *data_offset =
|
||||
(unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1));
|
||||
|
||||
const Brig::BrigOperand *p =
|
||||
(const Brig::BrigOperand *)obj->getOperand(*data_offset);
|
||||
|
||||
if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
|
||||
kind = Brig::BRIG_KIND_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
return init(*data_offset, obj);
|
||||
}
|
||||
template<typename T>
|
||||
std::string
|
||||
ImmOperand<T>::disassemble()
|
||||
{
|
||||
return csprintf("0x%08x", bits);
|
||||
}
|
||||
|
||||
template<typename RegOperand, typename T>
|
||||
class RegOrImmOperand : public BaseOperand
|
||||
{
|
||||
private:
|
||||
bool is_imm;
|
||||
|
||||
public:
|
||||
void setImm(const bool value) { is_imm = value; }
|
||||
|
||||
ImmOperand<T> imm_op;
|
||||
RegOperand reg_op;
|
||||
|
||||
RegOrImmOperand() { is_imm = false; }
|
||||
void init(unsigned opOffset, const BrigObject *obj);
|
||||
void init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
|
||||
std::string disassemble();
|
||||
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w, int lane)
|
||||
{
|
||||
return is_imm ? imm_op.template get<OperandType>(w) :
|
||||
reg_op.template get<OperandType>(w, lane);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
opSize()
|
||||
{
|
||||
if (!is_imm) {
|
||||
return reg_op.opSize();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
isVectorRegister()
|
||||
{
|
||||
if (!is_imm) {
|
||||
return reg_op.registerType == Enums::RT_VECTOR;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
isCondRegister()
|
||||
{
|
||||
if (!is_imm) {
|
||||
return reg_op.registerType == Enums::RT_CONDITION;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
isScalarRegister()
|
||||
{
|
||||
if (!is_imm) {
|
||||
return reg_op.registerType == Enums::RT_SCALAR;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
regIndex()
|
||||
{
|
||||
if (!is_imm) {
|
||||
return reg_op.regIndex();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename RegOperand, typename T>
|
||||
void
|
||||
RegOrImmOperand<RegOperand, T>::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
is_imm = false;
|
||||
|
||||
if (reg_op.init(opOffset, obj)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (imm_op.init(opOffset, obj)) {
|
||||
is_imm = true;
|
||||
return;
|
||||
}
|
||||
|
||||
fatal("RegOrImmOperand::init(): bad operand kind %d\n",
|
||||
obj->getOperand(opOffset)->kind);
|
||||
}
|
||||
|
||||
template<typename RegOperand, typename T>
|
||||
void
|
||||
RegOrImmOperand<RegOperand, T>::init_from_vect(unsigned opOffset,
|
||||
const BrigObject *obj, int at)
|
||||
{
|
||||
if (reg_op.init_from_vect(opOffset, obj, at)) {
|
||||
is_imm = false;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (imm_op.init_from_vect(opOffset, obj, at)) {
|
||||
is_imm = true;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
fatal("RegOrImmOperand::init(): bad operand kind %d\n",
|
||||
obj->getOperand(opOffset)->kind);
|
||||
}
|
||||
|
||||
template<typename RegOperand, typename T>
|
||||
std::string
|
||||
RegOrImmOperand<RegOperand, T>::disassemble()
|
||||
{
|
||||
return is_imm ? imm_op.disassemble() : reg_op.disassemble();
|
||||
}
|
||||
|
||||
typedef RegOrImmOperand<SRegOperand, uint32_t> SRegOrImmOperand;
|
||||
typedef RegOrImmOperand<DRegOperand, uint64_t> DRegOrImmOperand;
|
||||
typedef RegOrImmOperand<CRegOperand, bool> CRegOrImmOperand;
|
||||
|
||||
class AddrOperandBase : public BaseOperand
|
||||
{
|
||||
protected:
|
||||
// helper function for init()
|
||||
void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj);
|
||||
|
||||
// helper function for disassemble()
|
||||
std::string disassemble(std::string reg_disassembly);
|
||||
uint64_t calcUniformBase();
|
||||
|
||||
public:
|
||||
virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0;
|
||||
virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
|
||||
|
||||
int64_t offset;
|
||||
const char *name = nullptr;
|
||||
StorageElement *storageElement;
|
||||
};
|
||||
|
||||
template<typename RegOperandType>
|
||||
class RegAddrOperand : public AddrOperandBase
|
||||
{
|
||||
public:
|
||||
RegOperandType reg;
|
||||
void init(unsigned opOffset, const BrigObject *obj);
|
||||
uint64_t calcUniform();
|
||||
void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
|
||||
uint64_t calcLane(Wavefront *w, int lane=0);
|
||||
uint32_t opSize() { return reg.opSize(); }
|
||||
bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
|
||||
bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; }
|
||||
bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; }
|
||||
unsigned int regIndex() { return reg.regIndex(); }
|
||||
std::string disassemble();
|
||||
};
|
||||
|
||||
template<typename RegOperandType>
|
||||
void
|
||||
RegAddrOperand<RegOperandType>::init(unsigned opOffset, const BrigObject *obj)
|
||||
{
|
||||
using namespace Brig;
|
||||
|
||||
const BrigOperand *baseOp = obj->getOperand(opOffset);
|
||||
|
||||
switch (baseOp->kind) {
|
||||
case BRIG_KIND_OPERAND_ADDRESS:
|
||||
{
|
||||
const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
|
||||
storageElement = nullptr;
|
||||
|
||||
reg.init(op->reg, obj);
|
||||
|
||||
if (reg.regFileChar == 's') {
|
||||
// if the address expression is 32b, then the hi
|
||||
// bits of the offset must be set to 0 in the BRIG
|
||||
assert(!op->offset.hi);
|
||||
/**
|
||||
* the offset field of an HSAIL instruction may be negative
|
||||
* so here we cast the raw bits we get from the BRIG file to
|
||||
* a signed type to avoid address calculation errors
|
||||
*/
|
||||
offset = (int32_t)(op->offset.lo);
|
||||
reg.regOperandSize = sizeof(uint32_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
}
|
||||
else if (reg.regFileChar == 'd') {
|
||||
offset = (int64_t)(((uint64_t)(op->offset.hi) << 32)
|
||||
| (uint64_t)(op->offset.lo));
|
||||
reg.regOperandSize = sizeof(uint64_t);
|
||||
registerType = Enums::RT_VECTOR;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename RegOperandType>
|
||||
uint64_t
|
||||
RegAddrOperand<RegOperandType>::calcUniform()
|
||||
{
|
||||
fatal("can't do calcUniform() on register-based address\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename RegOperandType>
|
||||
void
|
||||
RegAddrOperand<RegOperandType>::calcVector(Wavefront *w,
|
||||
std::vector<Addr> &addrVec)
|
||||
{
|
||||
Addr address = calcUniformBase();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
|
||||
if (w->execMask(lane)) {
|
||||
if (reg.regFileChar == 's') {
|
||||
addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
|
||||
} else {
|
||||
addrVec[lane] = address + reg.template get<Addr>(w, lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename RegOperandType>
|
||||
uint64_t
|
||||
RegAddrOperand<RegOperandType>::calcLane(Wavefront *w, int lane)
|
||||
{
|
||||
Addr address = calcUniformBase();
|
||||
|
||||
return address + reg.template get<Addr>(w, lane);
|
||||
}
|
||||
|
||||
template<typename RegOperandType>
|
||||
std::string
|
||||
RegAddrOperand<RegOperandType>::disassemble()
|
||||
{
|
||||
return AddrOperandBase::disassemble(reg.disassemble());
|
||||
}
|
||||
|
||||
typedef RegAddrOperand<SRegOperand> SRegAddrOperand;
|
||||
typedef RegAddrOperand<DRegOperand> DRegAddrOperand;
|
||||
|
||||
class NoRegAddrOperand : public AddrOperandBase
|
||||
{
|
||||
public:
|
||||
void init(unsigned opOffset, const BrigObject *obj);
|
||||
uint64_t calcUniform();
|
||||
void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
|
||||
uint64_t calcLane(Wavefront *w, int lane=0);
|
||||
std::string disassemble();
|
||||
};
|
||||
|
||||
inline uint64_t
|
||||
NoRegAddrOperand::calcUniform()
|
||||
{
|
||||
return AddrOperandBase::calcUniformBase();
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
NoRegAddrOperand::calcLane(Wavefront *w, int lane)
|
||||
{
|
||||
return calcUniform();
|
||||
}
|
||||
|
||||
inline void
|
||||
NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec)
|
||||
{
|
||||
uint64_t address = calcUniformBase();
|
||||
|
||||
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane)
|
||||
addrVec[lane] = address;
|
||||
}
|
||||
|
||||
class LabelOperand : public BaseOperand
|
||||
{
|
||||
public:
|
||||
Label *label;
|
||||
|
||||
void init(unsigned opOffset, const BrigObject *obj);
|
||||
std::string disassemble();
|
||||
|
||||
// special get method for compatibility with SRegOperand
|
||||
uint32_t getTarget(Wavefront *w, int lane);
|
||||
|
||||
};
|
||||
|
||||
class ListOperand : public BaseOperand
|
||||
{
|
||||
public:
|
||||
int elementCount;
|
||||
std::vector<StorageElement*> callArgs;
|
||||
|
||||
int
|
||||
getSrcOperand(int idx)
|
||||
{
|
||||
DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx,
|
||||
callArgs.size());
|
||||
|
||||
return callArgs.at(idx)->offset;
|
||||
}
|
||||
|
||||
void init(unsigned opOffset, const BrigObject *obj);
|
||||
|
||||
std::string disassemble();
|
||||
|
||||
template<typename OperandType>
|
||||
OperandType
|
||||
get(Wavefront *w, int lane, int arg_idx)
|
||||
{
|
||||
return w->readCallArgMem<OperandType>(lane, getSrcOperand(arg_idx));
|
||||
}
|
||||
|
||||
template<typename OperandType>
|
||||
void
|
||||
set(Wavefront *w, int lane, OperandType val)
|
||||
{
|
||||
w->writeCallArgMem<OperandType>(lane, getSrcOperand(0), val);
|
||||
DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n",
|
||||
w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane,
|
||||
getSrcOperand(0), val);
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionRefOperand : public BaseOperand
|
||||
{
|
||||
public:
|
||||
const char *func_name;
|
||||
|
||||
void init(unsigned opOffset, const BrigObject *obj);
|
||||
std::string disassemble();
|
||||
};
|
||||
|
||||
#endif // __ARCH_HSAIL_OPERAND_HH__
|
||||
@@ -1,476 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt, Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#include "gpu-compute/brig_object.hh"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "arch/hsail/Brig.h"
|
||||
#include "base/logging.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "debug/BRIG.hh"
|
||||
#include "debug/HSAILObject.hh"
|
||||
#include "debug/HSALoader.hh"
|
||||
|
||||
using namespace Brig;
|
||||
|
||||
std::vector<std::function<HsaObject*(const std::string&, int, uint8_t*)>>
|
||||
HsaObject::tryFileFuncs = { BrigObject::tryFile };
|
||||
|
||||
extern int getBrigDataTypeBytes(BrigType16_t t);
|
||||
|
||||
const char *BrigObject::sectionNames[] =
|
||||
{
|
||||
"hsa_data",
|
||||
"hsa_code",
|
||||
"hsa_operand",
|
||||
".shstrtab"
|
||||
};
|
||||
|
||||
const char *segmentNames[] =
|
||||
{
|
||||
"none",
|
||||
"flat",
|
||||
"global",
|
||||
"readonly",
|
||||
"kernarg",
|
||||
"group",
|
||||
"private",
|
||||
"spill",
|
||||
"args"
|
||||
};
|
||||
|
||||
const uint8_t*
|
||||
BrigObject::getSectionOffset(enum SectionIndex sec, int offs) const
|
||||
{
|
||||
// allow offs == size for dummy end pointers
|
||||
assert(offs <= sectionInfo[sec].size);
|
||||
|
||||
return sectionInfo[sec].ptr + offs;
|
||||
}
|
||||
|
||||
const char*
|
||||
BrigObject::getString(int offs) const
|
||||
{
|
||||
return (const char*)(getSectionOffset(DataSectionIndex, offs) + 4);
|
||||
}
|
||||
|
||||
const BrigBase*
|
||||
BrigObject::getCodeSectionEntry(int offs) const
|
||||
{
|
||||
return (const BrigBase*)getSectionOffset(CodeSectionIndex, offs);
|
||||
}
|
||||
|
||||
const BrigData*
|
||||
BrigObject::getBrigBaseData(int offs) const
|
||||
{
|
||||
return (Brig::BrigData*)(getSectionOffset(DataSectionIndex, offs));
|
||||
}
|
||||
|
||||
const uint8_t*
|
||||
BrigObject::getData(int offs) const
|
||||
{
|
||||
return getSectionOffset(DataSectionIndex, offs);
|
||||
}
|
||||
|
||||
const BrigOperand*
|
||||
BrigObject::getOperand(int offs) const
|
||||
{
|
||||
return (const BrigOperand*)getSectionOffset(OperandsSectionIndex, offs);
|
||||
}
|
||||
|
||||
unsigned
|
||||
BrigObject::getOperandPtr(int offs, int index) const
|
||||
{
|
||||
unsigned *op_offs = (unsigned*)(getData(offs + 4 * (index + 1)));
|
||||
|
||||
return *op_offs;
|
||||
}
|
||||
|
||||
const BrigInstBase*
|
||||
BrigObject::getInst(int offs) const
|
||||
{
|
||||
return (const BrigInstBase*)getSectionOffset(CodeSectionIndex, offs);
|
||||
}
|
||||
|
||||
HsaCode*
|
||||
BrigObject::getKernel(const std::string &name) const
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
HsaCode*
|
||||
BrigObject::getFunction(const std::string &name) const
|
||||
{
|
||||
for (int i = 0; i < functions.size(); ++i) {
|
||||
if (functions[i]->name() == name) {
|
||||
return functions[i];
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
BrigObject::processDirectives(const BrigBase *dirPtr, const BrigBase *endPtr,
|
||||
StorageMap *storageMap)
|
||||
{
|
||||
while (dirPtr < endPtr) {
|
||||
if (!dirPtr->byteCount) {
|
||||
fatal("Bad directive size 0\n");
|
||||
}
|
||||
|
||||
// calculate next pointer now so we can override it if needed
|
||||
const BrigBase *nextDirPtr = brigNext(dirPtr);
|
||||
|
||||
DPRINTF(HSAILObject, "Code section entry kind: #%x, byte count: %d\n",
|
||||
dirPtr->kind, dirPtr->byteCount);
|
||||
|
||||
switch (dirPtr->kind) {
|
||||
case BRIG_KIND_DIRECTIVE_FUNCTION:
|
||||
{
|
||||
const BrigDirectiveExecutable *p M5_VAR_USED =
|
||||
reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
|
||||
|
||||
DPRINTF(HSAILObject,"DIRECTIVE_FUNCTION: %s offset: "
|
||||
"%d next: %d\n", getString(p->name),
|
||||
p->firstCodeBlockEntry, p->nextModuleEntry);
|
||||
|
||||
if (p->firstCodeBlockEntry != p->nextModuleEntry) {
|
||||
// Function calls are not supported. We allow the BRIG
|
||||
// object file to create stubs, but the function calls will
|
||||
// not work properly if the application makes use of them.
|
||||
warn("HSA function invocations are unsupported.\n");
|
||||
|
||||
const char *name = getString(p->name);
|
||||
|
||||
HsailCode *code_obj = nullptr;
|
||||
|
||||
for (int i = 0; i < functions.size(); ++i) {
|
||||
if (functions[i]->name() == name) {
|
||||
code_obj = functions[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!code_obj) {
|
||||
// create new local storage map for kernel-local symbols
|
||||
code_obj = new HsailCode(name, p, this,
|
||||
new StorageMap(storageMap));
|
||||
functions.push_back(code_obj);
|
||||
} else {
|
||||
panic("Multiple definition of Function!!: %s\n",
|
||||
getString(p->name));
|
||||
}
|
||||
}
|
||||
|
||||
nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_KERNEL:
|
||||
{
|
||||
const BrigDirectiveExecutable *p =
|
||||
reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
|
||||
|
||||
DPRINTF(HSAILObject,"DIRECTIVE_KERNEL: %s offset: %d count: "
|
||||
"next: %d\n", getString(p->name),
|
||||
p->firstCodeBlockEntry, p->nextModuleEntry);
|
||||
|
||||
const char *name = getString(p->name);
|
||||
|
||||
if (name[0] == '&')
|
||||
name++;
|
||||
|
||||
std::string str = name;
|
||||
char *temp;
|
||||
int len = str.length();
|
||||
|
||||
if (str[len - 1] >= 'a' && str[len - 1] <= 'z') {
|
||||
temp = new char[str.size() + 1];
|
||||
std::copy(str.begin(), str.end() , temp);
|
||||
temp[str.size()] = '\0';
|
||||
} else {
|
||||
temp = new char[str.size()];
|
||||
std::copy(str.begin(), str.end() - 1 , temp);
|
||||
temp[str.size() - 1 ] = '\0';
|
||||
}
|
||||
|
||||
std::string kernel_name = temp;
|
||||
delete[] temp;
|
||||
|
||||
HsailCode *code_obj = nullptr;
|
||||
|
||||
for (const auto &kernel : kernels) {
|
||||
if (kernel->name() == kernel_name) {
|
||||
code_obj = kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!code_obj) {
|
||||
// create new local storage map for kernel-local symbols
|
||||
code_obj = new HsailCode(kernel_name, p, this,
|
||||
new StorageMap(storageMap));
|
||||
|
||||
kernels.push_back(code_obj);
|
||||
}
|
||||
|
||||
nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_VARIABLE:
|
||||
{
|
||||
const BrigDirectiveVariable *p =
|
||||
reinterpret_cast<const BrigDirectiveVariable*>(dirPtr);
|
||||
|
||||
uint64_t readonlySize_old =
|
||||
storageMap->getSize(BRIG_SEGMENT_READONLY);
|
||||
|
||||
StorageElement* se = storageMap->addSymbol(p, this);
|
||||
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_VARIABLE, symbol %s\n",
|
||||
getString(p->name));
|
||||
|
||||
if (p->segment == BRIG_SEGMENT_READONLY) {
|
||||
// readonly memory has initialization data
|
||||
uint8_t* readonlyData_old = readonlyData;
|
||||
|
||||
readonlyData =
|
||||
new uint8_t[storageMap->getSize(BRIG_SEGMENT_READONLY)];
|
||||
|
||||
if (p->init) {
|
||||
if ((p->type == BRIG_TYPE_ROIMG) ||
|
||||
(p->type == BRIG_TYPE_WOIMG) ||
|
||||
(p->type == BRIG_TYPE_SAMP) ||
|
||||
(p->type == BRIG_TYPE_SIG32) ||
|
||||
(p->type == BRIG_TYPE_SIG64)) {
|
||||
panic("Read only data type not supported: %s\n",
|
||||
getString(p->name));
|
||||
}
|
||||
|
||||
const BrigOperand *brigOp = getOperand(p->init);
|
||||
assert(brigOp->kind ==
|
||||
BRIG_KIND_OPERAND_CONSTANT_BYTES);
|
||||
|
||||
const Brig::BrigData *operand_data M5_VAR_USED =
|
||||
getBrigBaseData(((BrigOperandConstantBytes*)
|
||||
brigOp)->bytes);
|
||||
|
||||
assert((operand_data->byteCount / 4) > 0);
|
||||
|
||||
uint8_t *symbol_data =
|
||||
(uint8_t*)getData(((BrigOperandConstantBytes*)
|
||||
brigOp)->bytes + 4);
|
||||
|
||||
// copy the old data and add the new data
|
||||
if (readonlySize_old > 0) {
|
||||
memcpy(readonlyData, readonlyData_old,
|
||||
readonlySize_old);
|
||||
}
|
||||
|
||||
memcpy(readonlyData + se->offset, symbol_data,
|
||||
se->size);
|
||||
|
||||
delete[] readonlyData_old;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_LABEL:
|
||||
{
|
||||
const BrigDirectiveLabel M5_VAR_USED *p =
|
||||
reinterpret_cast<const BrigDirectiveLabel*>(dirPtr);
|
||||
|
||||
panic("Label directives cannot be at the module level: %s\n",
|
||||
getString(p->name));
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_COMMENT:
|
||||
{
|
||||
const BrigDirectiveComment M5_VAR_USED *p =
|
||||
reinterpret_cast<const BrigDirectiveComment*>(dirPtr);
|
||||
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_COMMENT: %s\n",
|
||||
getString(p->name));
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_LOC:
|
||||
{
|
||||
DPRINTF(HSAILObject, "BRIG_DIRECTIVE_LOC\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_MODULE:
|
||||
{
|
||||
const BrigDirectiveModule M5_VAR_USED *p =
|
||||
reinterpret_cast<const BrigDirectiveModule*>(dirPtr);
|
||||
|
||||
DPRINTF(HSAILObject, "BRIG_DIRECTIVE_MODULE: %s\n",
|
||||
getString(p->name));
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_CONTROL:
|
||||
{
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_CONTROL\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_PRAGMA:
|
||||
{
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_PRAGMA\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_EXTENSION:
|
||||
{
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_EXTENSION\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
|
||||
{
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_START\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
|
||||
{
|
||||
DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_END\n");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (dirPtr->kind >= BRIG_KIND_INST_BEGIN &&
|
||||
dirPtr->kind <= BRIG_KIND_INST_END)
|
||||
break;
|
||||
|
||||
if (dirPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
|
||||
dirPtr->kind <= BRIG_KIND_OPERAND_END)
|
||||
break;
|
||||
|
||||
warn("Unknown Brig directive kind: %d\n", dirPtr->kind);
|
||||
break;
|
||||
}
|
||||
|
||||
dirPtr = nextDirPtr;
|
||||
}
|
||||
}
|
||||
|
||||
HsaObject*
|
||||
BrigObject::tryFile(const std::string &fname, int len, uint8_t *fileData)
|
||||
{
|
||||
const char *brig_ident = "HSA BRIG";
|
||||
|
||||
if (memcmp(brig_ident, fileData, MODULE_IDENTIFICATION_LENGTH))
|
||||
return nullptr;
|
||||
|
||||
return new BrigObject(fname, len, fileData);
|
||||
}
|
||||
|
||||
BrigObject::BrigObject(const std::string &fname, int len, uint8_t *fileData)
|
||||
: HsaObject(fname), storageMap(new StorageMap())
|
||||
{
|
||||
const char *brig_ident = "HSA BRIG";
|
||||
BrigModuleHeader *mod_hdr = (BrigModuleHeader*)fileData;
|
||||
|
||||
fatal_if(memcmp(brig_ident, mod_hdr, MODULE_IDENTIFICATION_LENGTH),
|
||||
"%s is not a BRIG file\n", fname);
|
||||
|
||||
if (mod_hdr->brigMajor != BRIG_VERSION_BRIG_MAJOR ||
|
||||
mod_hdr->brigMinor != BRIG_VERSION_BRIG_MINOR) {
|
||||
fatal("%s: BRIG version mismatch, %d.%d != %d.%d\n",
|
||||
fname, mod_hdr->brigMajor, mod_hdr->brigMinor,
|
||||
BRIG_VERSION_BRIG_MAJOR, BRIG_VERSION_BRIG_MINOR);
|
||||
}
|
||||
|
||||
fatal_if(mod_hdr->sectionCount != NumSectionIndices, "%s: BRIG section "
|
||||
"count (%d) != expected value (%d)\n", fname,
|
||||
mod_hdr->sectionCount, NumSectionIndices);
|
||||
|
||||
for (int i = 0; i < NumSectionIndices; ++i) {
|
||||
sectionInfo[i].ptr = nullptr;
|
||||
}
|
||||
|
||||
uint64_t *sec_idx_table = (uint64_t*)(fileData + mod_hdr->sectionIndex);
|
||||
for (int sec_idx = 0; sec_idx < mod_hdr->sectionCount; ++sec_idx) {
|
||||
uint8_t *sec_hdr_byte_ptr = fileData + sec_idx_table[sec_idx];
|
||||
BrigSectionHeader *sec_hdr = (BrigSectionHeader*)sec_hdr_byte_ptr;
|
||||
|
||||
// It doesn't look like cprintf supports string precision values,
|
||||
// but if this breaks, the right answer is to fix that
|
||||
DPRINTF(HSAILObject, "found section %.*s\n", sec_hdr->nameLength,
|
||||
sec_hdr->name);
|
||||
|
||||
sectionInfo[sec_idx].ptr = new uint8_t[sec_hdr->byteCount];
|
||||
memcpy(sectionInfo[sec_idx].ptr, sec_hdr_byte_ptr, sec_hdr->byteCount);
|
||||
sectionInfo[sec_idx].size = sec_hdr->byteCount;
|
||||
}
|
||||
|
||||
BrigSectionHeader *code_hdr =
|
||||
(BrigSectionHeader*)sectionInfo[CodeSectionIndex].ptr;
|
||||
|
||||
DPRINTF(HSAILObject, "Code section hdr, count: %d, hdr count: %d, "
|
||||
"name len: %d\n", code_hdr->byteCount, code_hdr->headerByteCount,
|
||||
code_hdr->nameLength);
|
||||
|
||||
// start at offset 4 to skip initial null entry (see Brig spec)
|
||||
processDirectives(getCodeSectionEntry(code_hdr->headerByteCount),
|
||||
getCodeSectionEntry(sectionInfo[CodeSectionIndex].size),
|
||||
storageMap);
|
||||
|
||||
delete[] fileData;
|
||||
|
||||
DPRINTF(HSALoader, "BRIG object %s loaded.\n", fname);
|
||||
}
|
||||
|
||||
BrigObject::~BrigObject()
|
||||
{
|
||||
for (int i = 0; i < NumSectionIndices; ++i)
|
||||
if (sectionInfo[i].ptr)
|
||||
delete[] sectionInfo[i].ptr;
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt, Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __BRIG_OBJECT_HH__
|
||||
#define __BRIG_OBJECT_HH__
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/hsail/Brig.h"
|
||||
#include "gpu-compute/hsa_object.hh"
|
||||
#include "gpu-compute/hsail_code.hh"
|
||||
|
||||
class LabelMap;
|
||||
class StorageMap;
|
||||
|
||||
/* @class BrigObject
|
||||
* this class implements the BRIG loader object, and
|
||||
* is used when the simulator directly executes HSAIL.
|
||||
* this class is responsible for extracting all
|
||||
* information about kernels contained in BRIG format
|
||||
* and converts them to HsailCode objects that are
|
||||
* usable by the simulator and emulated runtime.
|
||||
*/
|
||||
|
||||
class BrigObject final : public HsaObject
|
||||
{
|
||||
public:
|
||||
enum SectionIndex
|
||||
{
|
||||
DataSectionIndex,
|
||||
CodeSectionIndex,
|
||||
OperandsSectionIndex,
|
||||
NumSectionIndices
|
||||
};
|
||||
|
||||
static const char *sectionNames[];
|
||||
|
||||
struct SectionInfo
|
||||
{
|
||||
uint8_t *ptr;
|
||||
int size;
|
||||
};
|
||||
|
||||
static HsaObject* tryFile(const std::string &fname, int len,
|
||||
uint8_t *fileData);
|
||||
|
||||
SectionInfo sectionInfo[NumSectionIndices];
|
||||
const uint8_t *getSectionOffset(enum SectionIndex sec, int offs) const;
|
||||
|
||||
std::vector<HsailCode*> kernels;
|
||||
std::vector<HsailCode*> functions;
|
||||
std::string kern_block_name;
|
||||
|
||||
void processDirectives(const Brig::BrigBase *dirPtr,
|
||||
const Brig::BrigBase *endPtr,
|
||||
StorageMap *storageMap);
|
||||
|
||||
BrigObject(const std::string &fname, int len, uint8_t *fileData);
|
||||
~BrigObject();
|
||||
|
||||
// eventually these will need to be per-kernel not per-object-file
|
||||
StorageMap *storageMap;
|
||||
LabelMap *labelMap;
|
||||
|
||||
const char* getString(int offs) const;
|
||||
const Brig::BrigData* getBrigBaseData(int offs) const;
|
||||
const uint8_t* getData(int offs) const;
|
||||
const Brig::BrigBase* getCodeSectionEntry(int offs) const;
|
||||
const Brig::BrigOperand* getOperand(int offs) const;
|
||||
unsigned getOperandPtr(int offs, int index) const;
|
||||
const Brig::BrigInstBase* getInst(int offs) const;
|
||||
|
||||
HsaCode* getKernel(const std::string &name) const override;
|
||||
HsaCode* getFunction(const std::string &name) const override;
|
||||
|
||||
int numKernels() const override { return kernels.size(); }
|
||||
|
||||
HsaCode* getKernel(int i) const override { return kernels[i]; }
|
||||
|
||||
// pointer to the current kernel/function we're processing, so elements
|
||||
// under construction can reference it. kinda ugly, but easier
|
||||
// than passing it all over for the few places it's needed.
|
||||
mutable HsailCode *currentCode;
|
||||
};
|
||||
|
||||
// Utility function to bump Brig item pointer to next element given
|
||||
// item size in bytes. Really just an add but with lots of casting.
|
||||
template<typename T>
|
||||
T*
|
||||
brigNext(T *ptr)
|
||||
{
|
||||
Brig::BrigBase *base_ptr = (Brig::BrigBase*)ptr;
|
||||
int size = base_ptr->byteCount;
|
||||
assert(size);
|
||||
|
||||
return (T*)((uint8_t*)ptr + size);
|
||||
}
|
||||
|
||||
#endif // __BRIG_OBJECT_HH__
|
||||
@@ -1,279 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#include "gpu-compute/cl_driver.hh"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "gpu-compute/dispatcher.hh"
|
||||
#include "gpu-compute/hsa_code.hh"
|
||||
#include "gpu-compute/hsa_kernel_info.hh"
|
||||
#include "gpu-compute/hsa_object.hh"
|
||||
#include "params/ClDriver.hh"
|
||||
#include "sim/process.hh"
|
||||
#include "sim/syscall_emul_buf.hh"
|
||||
|
||||
ClDriver::ClDriver(ClDriverParams *p)
|
||||
: EmulatedDriver(p), hsaCode(0)
|
||||
{
|
||||
for (const auto &codeFile : p->codefile)
|
||||
codeFiles.push_back(&codeFile);
|
||||
|
||||
maxFuncArgsSize = 0;
|
||||
|
||||
for (int i = 0; i < codeFiles.size(); ++i) {
|
||||
HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
|
||||
|
||||
for (int k = 0; k < obj->numKernels(); ++k) {
|
||||
assert(obj->getKernel(k));
|
||||
kernels.push_back(obj->getKernel(k));
|
||||
kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
|
||||
int kern_funcargs_size = kernels.back()->funcarg_size;
|
||||
maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
|
||||
kern_funcargs_size : maxFuncArgsSize;
|
||||
}
|
||||
}
|
||||
|
||||
int name_offs = 0;
|
||||
int code_offs = 0;
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
kernelInfo.push_back(HsaKernelInfo());
|
||||
HsaCode *k = kernels[i];
|
||||
|
||||
k->generateHsaKernelInfo(&kernelInfo[i]);
|
||||
|
||||
kernelInfo[i].name_offs = name_offs;
|
||||
kernelInfo[i].code_offs = code_offs;
|
||||
|
||||
name_offs += k->name().size() + 1;
|
||||
code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ClDriver::handshake(GpuDispatcher *_dispatcher)
|
||||
{
|
||||
dispatcher = _dispatcher;
|
||||
dispatcher->setFuncargsSize(maxFuncArgsSize);
|
||||
}
|
||||
|
||||
int
|
||||
ClDriver::open(ThreadContext *tc, int mode, int flags)
|
||||
{
|
||||
auto p = tc->getProcessPtr();
|
||||
std::shared_ptr<DeviceFDEntry> fdp;
|
||||
fdp = std::make_shared<DeviceFDEntry>(this, filename);
|
||||
int tgt_fd = p->fds->allocFD(fdp);
|
||||
return tgt_fd;
|
||||
}
|
||||
|
||||
int
|
||||
ClDriver::ioctl(ThreadContext *tc, unsigned req, Addr buf_addr)
|
||||
{
|
||||
switch (req) {
|
||||
case HSA_GET_SIZES:
|
||||
{
|
||||
TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
|
||||
sizes->num_kernels = kernels.size();
|
||||
sizes->string_table_size = 0;
|
||||
sizes->code_size = 0;
|
||||
sizes->readonly_size = 0;
|
||||
|
||||
if (kernels.size() > 0) {
|
||||
// all kernels will share the same read-only memory
|
||||
sizes->readonly_size =
|
||||
kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
|
||||
// check our assumption
|
||||
for (int i = 1; i<kernels.size(); ++i) {
|
||||
assert(sizes->readonly_size ==
|
||||
kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
HsaCode *k = kernels[i];
|
||||
// add one for terminating '\0'
|
||||
sizes->string_table_size += k->name().size() + 1;
|
||||
sizes->code_size +=
|
||||
k->numInsts() * sizeof(TheGpuISA::RawMachInst);
|
||||
}
|
||||
|
||||
sizes.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
case HSA_GET_KINFO:
|
||||
{
|
||||
TypedBufferArg<HsaKernelInfo>
|
||||
kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
HsaKernelInfo *ki = &kinfo[i];
|
||||
ki->name_offs = kernelInfo[i].name_offs;
|
||||
ki->code_offs = kernelInfo[i].code_offs;
|
||||
ki->sRegCount = kernelInfo[i].sRegCount;
|
||||
ki->dRegCount = kernelInfo[i].dRegCount;
|
||||
ki->cRegCount = kernelInfo[i].cRegCount;
|
||||
ki->static_lds_size = kernelInfo[i].static_lds_size;
|
||||
ki->private_mem_size = kernelInfo[i].private_mem_size;
|
||||
ki->spill_mem_size = kernelInfo[i].spill_mem_size;
|
||||
}
|
||||
|
||||
kinfo.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
case HSA_GET_STRINGS:
|
||||
{
|
||||
int string_table_size = 0;
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
HsaCode *k = kernels[i];
|
||||
string_table_size += k->name().size() + 1;
|
||||
}
|
||||
|
||||
BufferArg buf(buf_addr, string_table_size);
|
||||
char *bufp = (char*)buf.bufferPtr();
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
HsaCode *k = kernels[i];
|
||||
const char *n = k->name().c_str();
|
||||
|
||||
// idiomatic string copy
|
||||
while ((*bufp++ = *n++));
|
||||
}
|
||||
|
||||
assert(bufp - (char *)buf.bufferPtr() == string_table_size);
|
||||
|
||||
buf.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
case HSA_GET_READONLY_DATA:
|
||||
{
|
||||
// we can pick any kernel --- they share the same
|
||||
// readonly segment (this assumption is checked in GET_SIZES)
|
||||
uint64_t size =
|
||||
kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
|
||||
BufferArg data(buf_addr, size);
|
||||
char *datap = (char *)data.bufferPtr();
|
||||
memcpy(datap,
|
||||
kernels.back()->readonly_data,
|
||||
size);
|
||||
data.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
case HSA_GET_CODE:
|
||||
{
|
||||
// set hsaCode pointer
|
||||
hsaCode = buf_addr;
|
||||
int code_size = 0;
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
HsaCode *k = kernels[i];
|
||||
code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
|
||||
}
|
||||
|
||||
TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
|
||||
TheGpuISA::RawMachInst *bufp = buf;
|
||||
|
||||
int buf_idx = 0;
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
HsaCode *k = kernels[i];
|
||||
|
||||
for (int j = 0; j < k->numInsts(); ++j) {
|
||||
bufp[buf_idx] = k->insts()->at(j);
|
||||
++buf_idx;
|
||||
}
|
||||
}
|
||||
|
||||
buf.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
case HSA_GET_CU_CNT:
|
||||
{
|
||||
BufferArg buf(buf_addr, sizeof(uint32_t));
|
||||
*((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
|
||||
buf.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
case HSA_GET_VSZ:
|
||||
{
|
||||
BufferArg buf(buf_addr, sizeof(uint32_t));
|
||||
*((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
|
||||
buf.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
case HSA_GET_HW_STATIC_CONTEXT_SIZE:
|
||||
{
|
||||
BufferArg buf(buf_addr, sizeof(uint32_t));
|
||||
*((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
|
||||
buf.copyOut(tc->getVirtProxy());
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
fatal("ClDriver: bad ioctl %d\n", req);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char*
|
||||
ClDriver::codeOffToKernelName(uint64_t code_ptr)
|
||||
{
|
||||
assert(hsaCode);
|
||||
uint32_t code_offs = code_ptr - hsaCode;
|
||||
|
||||
for (int i = 0; i < kernels.size(); ++i) {
|
||||
if (code_offs == kernelInfo[i].code_offs) {
|
||||
return kernels[i]->name().c_str();
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ClDriver*
|
||||
ClDriverParams::create()
|
||||
{
|
||||
return new ClDriver(this);
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __CL_DRIVER_HH__
|
||||
#define __CL_DRIVER_HH__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "gpu-compute/hsa_kernel_info.hh"
|
||||
#include "sim/emul_driver.hh"
|
||||
|
||||
class GpuDispatcher;
|
||||
class HsaCode;
|
||||
class Process;
|
||||
class ThreadContext;
|
||||
|
||||
struct ClDriverParams;
|
||||
|
||||
class ClDriver final : public EmulatedDriver
|
||||
{
|
||||
public:
|
||||
ClDriver(ClDriverParams *p);
|
||||
void handshake(GpuDispatcher *_dispatcher);
|
||||
int open(ThreadContext *tc, int mode, int flags);
|
||||
int ioctl(ThreadContext *tc, unsigned req, Addr buf);
|
||||
const char* codeOffToKernelName(uint64_t code_ptr);
|
||||
|
||||
private:
|
||||
GpuDispatcher *dispatcher;
|
||||
|
||||
std::vector<const std::string*> codeFiles;
|
||||
|
||||
// All the kernels we know about
|
||||
std::vector<HsaCode*> kernels;
|
||||
std::vector<HsaCode*> functions;
|
||||
|
||||
std::vector<HsaKernelInfo> kernelInfo;
|
||||
|
||||
// maximum size necessary for function arguments
|
||||
int maxFuncArgsSize;
|
||||
// The host virtual address for the kernel code
|
||||
uint64_t hsaCode;
|
||||
};
|
||||
|
||||
#endif // __CL_DRIVER_HH__
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __GPU_CL_EVENT_HH__
|
||||
#define __GPU_CL_EVENT_HH__
|
||||
|
||||
struct HsaQueueEntry;
|
||||
|
||||
class _cl_event {
|
||||
public:
|
||||
_cl_event() : done(false), hsaTaskPtr(nullptr), start(0), end(0) { }
|
||||
|
||||
volatile bool done;
|
||||
HsaQueueEntry *hsaTaskPtr;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
#endif // __GPU_CL_EVENT_HH__
|
||||
@@ -1,83 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: John Kalamatianos
|
||||
*/
|
||||
|
||||
#include "gpu-compute/condition_register_state.hh"
|
||||
|
||||
#include "gpu-compute/compute_unit.hh"
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
#include "gpu-compute/shader.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
ConditionRegisterState::ConditionRegisterState()
|
||||
{
|
||||
computeUnit = nullptr;
|
||||
c_reg.clear();
|
||||
busy.clear();
|
||||
}
|
||||
|
||||
void
|
||||
ConditionRegisterState::setParent(ComputeUnit *_computeUnit)
|
||||
{
|
||||
computeUnit = _computeUnit;
|
||||
_name = computeUnit->name() + ".CondRegState";
|
||||
}
|
||||
|
||||
void
|
||||
ConditionRegisterState::init(uint32_t _size)
|
||||
{
|
||||
c_reg.resize(_size);
|
||||
busy.resize(_size, 0);
|
||||
}
|
||||
|
||||
void
|
||||
ConditionRegisterState::exec(GPUDynInstPtr ii, Wavefront *w)
|
||||
{
|
||||
// iterate over all operands
|
||||
for (auto i = 0; i < ii->getNumOperands(); ++i) {
|
||||
// is this a condition register destination operand?
|
||||
if (ii->isCondRegister(i) && ii->isDstOperand(i)) {
|
||||
// mark the register as busy
|
||||
markReg(ii->getRegisterIndex(i, ii), 1);
|
||||
uint32_t pipeLen = w->computeUnit->spBypassLength();
|
||||
|
||||
// schedule an event for marking the register as ready
|
||||
w->computeUnit->
|
||||
registerEvent(w->simdId, ii->getRegisterIndex(i, ii),
|
||||
ii->getOperandSize(i),
|
||||
w->computeUnit->shader->tick_cnt +
|
||||
w->computeUnit->shader->ticks(pipeLen), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: John Kalamatianos
|
||||
*/
|
||||
|
||||
#ifndef __CONDITION_REGISTER_STATE_HH__
|
||||
#define __CONDITION_REGISTER_STATE_HH__
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gpu-compute/misc.hh"
|
||||
|
||||
class ComputeUnit;
|
||||
class GPUStaticInst;
|
||||
class Shader;
|
||||
class Wavefront;
|
||||
|
||||
// Condition Register State (used only when executing HSAIL)
|
||||
class ConditionRegisterState
|
||||
{
|
||||
public:
|
||||
ConditionRegisterState();
|
||||
void init(uint32_t _size);
|
||||
const std::string name() const { return _name; }
|
||||
void setParent(ComputeUnit *_computeUnit);
|
||||
void regStats() { }
|
||||
|
||||
template<typename T>
|
||||
T
|
||||
read(int regIdx, int threadId)
|
||||
{
|
||||
bool tmp = c_reg[regIdx][threadId];
|
||||
T *p0 = (T*)(&tmp);
|
||||
|
||||
return *p0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
write(int regIdx, int threadId, T value)
|
||||
{
|
||||
c_reg[regIdx][threadId] = (bool)(value & 0x01);
|
||||
}
|
||||
|
||||
void
|
||||
markReg(int regIdx, uint8_t value)
|
||||
{
|
||||
busy.at(regIdx) = value;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
regBusy(int idx)
|
||||
{
|
||||
uint8_t status = busy.at(idx);
|
||||
return status;
|
||||
}
|
||||
|
||||
int numRegs() { return c_reg.size(); }
|
||||
void exec(GPUDynInstPtr ii, Wavefront *w);
|
||||
|
||||
private:
|
||||
ComputeUnit* computeUnit;
|
||||
std::string _name;
|
||||
// Condition Register state
|
||||
std::vector<VectorMask> c_reg;
|
||||
// flag indicating if a register is busy
|
||||
std::vector<uint8_t> busy;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,101 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __HSA_CODE_HH__
|
||||
#define __HSA_CODE_HH__
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/gpu_types.hh"
|
||||
#include "config/the_gpu_isa.hh"
|
||||
|
||||
class HsaKernelInfo;
|
||||
|
||||
/* @class HsaCode
|
||||
* base code object for the set of HSA kernels associated
|
||||
* with a single application. this class provides the common
|
||||
* methods for creating, accessing, and storing information
|
||||
* about kernel and variable symbols, symbol name, memory
|
||||
* segment sizes, and instruction count, etc.
|
||||
*/
|
||||
|
||||
class HsaCode
|
||||
{
|
||||
public:
|
||||
HsaCode(const std::string &name) : readonly_data(nullptr), funcarg_size(0),
|
||||
_name(name)
|
||||
{
|
||||
}
|
||||
|
||||
enum class MemorySegment {
|
||||
NONE,
|
||||
FLAT,
|
||||
GLOBAL,
|
||||
READONLY,
|
||||
KERNARG,
|
||||
GROUP,
|
||||
PRIVATE,
|
||||
SPILL,
|
||||
ARG,
|
||||
EXTSPACE0
|
||||
};
|
||||
|
||||
const std::string& name() const { return _name; }
|
||||
int numInsts() const { return _insts.size(); }
|
||||
std::vector<TheGpuISA::RawMachInst>* insts() { return &_insts; }
|
||||
|
||||
void
|
||||
setReadonlyData(uint8_t *_readonly_data)
|
||||
{
|
||||
readonly_data = _readonly_data;
|
||||
}
|
||||
|
||||
virtual int getSize(MemorySegment segment) const = 0;
|
||||
virtual void generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const = 0;
|
||||
|
||||
uint8_t *readonly_data;
|
||||
int funcarg_size;
|
||||
|
||||
protected:
|
||||
// An array that stores instruction indices (0 through kernel size)
|
||||
// for a kernel passed to code object constructor as an argument.
|
||||
std::vector<TheGpuISA::RawMachInst> _insts;
|
||||
|
||||
private:
|
||||
const std::string _name;
|
||||
};
|
||||
|
||||
#endif // __HSA_CODE_HH__
|
||||
@@ -1,80 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#ifndef __HSA_KERNEL_INFO_HH__
|
||||
#define __HSA_KERNEL_INFO_HH__
|
||||
|
||||
// This file defines the public interface between the HSA emulated
|
||||
// driver and application programs.
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
static const int HSA_GET_SIZES = 0x4801;
|
||||
static const int HSA_GET_KINFO = 0x4802;
|
||||
static const int HSA_GET_STRINGS = 0x4803;
|
||||
static const int HSA_GET_CODE = 0x4804;
|
||||
static const int HSA_GET_READONLY_DATA = 0x4805;
|
||||
static const int HSA_GET_CU_CNT = 0x4806;
|
||||
static const int HSA_GET_VSZ = 0x4807;
|
||||
static const int HSA_GET_HW_STATIC_CONTEXT_SIZE = 0x4808;
|
||||
|
||||
// Return value (via buffer ptr) for HSA_GET_SIZES
|
||||
struct HsaDriverSizes
|
||||
{
|
||||
uint32_t num_kernels;
|
||||
uint32_t string_table_size;
|
||||
uint32_t code_size;
|
||||
uint32_t readonly_size;
|
||||
};
|
||||
|
||||
// HSA_GET_KINFO returns an array of num_kernels of these structs
|
||||
struct HsaKernelInfo
|
||||
{
|
||||
// byte offset into string table
|
||||
uint32_t name_offs;
|
||||
// byte offset into code array
|
||||
uint32_t code_offs;
|
||||
uint32_t static_lds_size;
|
||||
uint32_t private_mem_size;
|
||||
uint32_t spill_mem_size;
|
||||
// Number of s registers
|
||||
uint32_t sRegCount;
|
||||
// Number of d registers
|
||||
uint32_t dRegCount;
|
||||
// Number of c registers
|
||||
uint32_t cRegCount;
|
||||
};
|
||||
|
||||
#endif // __HSA_KERNEL_INFO_HH__
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#include "gpu-compute/hsa_object.hh"
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
|
||||
#include "base/logging.hh"
|
||||
|
||||
HsaObject::HsaObject(const std::string &fname)
|
||||
: readonlyData(nullptr), filename(fname)
|
||||
{
|
||||
}
|
||||
|
||||
HsaObject*
|
||||
HsaObject::createHsaObject(const std::string &fname)
|
||||
{
|
||||
HsaObject *hsaObj = nullptr;
|
||||
uint8_t *file_data = nullptr;
|
||||
int file_length = 0;
|
||||
|
||||
std::ifstream code_file(fname, std::ifstream::ate | std::ifstream::in |
|
||||
std::ifstream::binary);
|
||||
|
||||
assert(code_file.is_open());
|
||||
assert(code_file.good());
|
||||
|
||||
file_length = code_file.tellg();
|
||||
code_file.seekg(0, code_file.beg);
|
||||
file_data = new uint8_t[file_length];
|
||||
code_file.read((char*)file_data, file_length);
|
||||
code_file.close();
|
||||
|
||||
for (const auto &tryFile : tryFileFuncs) {
|
||||
if ((hsaObj = tryFile(fname, file_length, file_data))) {
|
||||
return hsaObj;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] file_data;
|
||||
fatal("Unknown HSA object type for file: %s.\n", fname);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#ifndef __HSA_OBJECT_HH__
|
||||
#define __HSA_OBJECT_HH__
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class HsaCode;
|
||||
|
||||
/* @class HsaObject
|
||||
* base loader object for HSA kernels. this class provides
|
||||
* the base method definitions for loading, storing, and
|
||||
* accessing HSA kernel objects into the simulator.
|
||||
*/
|
||||
|
||||
class HsaObject
|
||||
{
|
||||
public:
|
||||
HsaObject(const std::string &fileName);
|
||||
|
||||
static HsaObject* createHsaObject(const std::string &fname);
|
||||
static std::vector<std::function<HsaObject*(const std::string&, int,
|
||||
uint8_t*)>> tryFileFuncs;
|
||||
|
||||
virtual HsaCode* getKernel(const std::string &name) const = 0;
|
||||
virtual HsaCode* getKernel(int i) const = 0;
|
||||
virtual HsaCode* getFunction(const std::string &name) const = 0;
|
||||
virtual int numKernels() const = 0;
|
||||
|
||||
const std::string& name() const { return filename; }
|
||||
|
||||
uint8_t *readonlyData;
|
||||
|
||||
|
||||
protected:
|
||||
const std::string filename;
|
||||
};
|
||||
|
||||
#endif // __HSA_OBJECT_HH__
|
||||
@@ -1,460 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#include "gpu-compute/hsail_code.hh"
|
||||
|
||||
#include "arch/gpu_types.hh"
|
||||
#include "arch/hsail/Brig.h"
|
||||
#include "arch/hsail/operand.hh"
|
||||
#include "config/the_gpu_isa.hh"
|
||||
#include "debug/BRIG.hh"
|
||||
#include "debug/HSAILObject.hh"
|
||||
#include "gpu-compute/brig_object.hh"
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
#include "gpu-compute/kernel_cfg.hh"
|
||||
|
||||
using namespace Brig;
|
||||
|
||||
int getBrigDataTypeBytes(BrigType16_t t);
|
||||
|
||||
HsailCode::HsailCode(const std::string &name_str)
|
||||
: HsaCode(name_str), private_size(-1), readonly_size(-1)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
|
||||
StorageMap *objStorageMap)
|
||||
{
|
||||
storageMap = objStorageMap;
|
||||
|
||||
// set pointer so that decoding process can find this kernel context when
|
||||
// needed
|
||||
obj->currentCode = this;
|
||||
|
||||
if (code_dir->base.kind != BRIG_KIND_DIRECTIVE_FUNCTION &&
|
||||
code_dir->base.kind != BRIG_KIND_DIRECTIVE_KERNEL) {
|
||||
fatal("unexpected directive kind %d inside kernel/function init\n",
|
||||
code_dir->base.kind);
|
||||
}
|
||||
|
||||
DPRINTF(HSAILObject, "Initializing code, first code block entry is: %d\n",
|
||||
code_dir->firstCodeBlockEntry);
|
||||
|
||||
// clear these static vars so we can properly track the max index
|
||||
// for this kernel
|
||||
SRegOperand::maxRegIdx = 0;
|
||||
DRegOperand::maxRegIdx = 0;
|
||||
CRegOperand::maxRegIdx = 0;
|
||||
setPrivateSize(0);
|
||||
|
||||
const BrigBase *entryPtr = brigNext((BrigBase*)code_dir);
|
||||
const BrigBase *endPtr =
|
||||
obj->getCodeSectionEntry(code_dir->nextModuleEntry);
|
||||
|
||||
// the instruction's byte address (relative to the base addr
|
||||
// of the code section)
|
||||
int inst_addr = 0;
|
||||
// the index that points to the instruction in the instruction
|
||||
// array
|
||||
int inst_idx = 0;
|
||||
std::vector<GPUStaticInst*> instructions;
|
||||
int funcarg_size_scope = 0;
|
||||
|
||||
// walk through instructions in code section and directives in
|
||||
// directive section in parallel, processing directives that apply
|
||||
// when we reach the relevant code point.
|
||||
while (entryPtr < endPtr) {
|
||||
switch (entryPtr->kind) {
|
||||
case BRIG_KIND_DIRECTIVE_VARIABLE:
|
||||
{
|
||||
const BrigDirectiveVariable *sym =
|
||||
(const BrigDirectiveVariable*)entryPtr;
|
||||
|
||||
DPRINTF(HSAILObject,"Initializing code, directive is "
|
||||
"kind_variable, symbol is: %s\n",
|
||||
obj->getString(sym->name));
|
||||
|
||||
StorageElement *se = storageMap->addSymbol(sym, obj);
|
||||
|
||||
if (sym->segment == BRIG_SEGMENT_PRIVATE) {
|
||||
setPrivateSize(se->size);
|
||||
} else { // spill
|
||||
funcarg_size_scope += se->size;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_LABEL:
|
||||
{
|
||||
const BrigDirectiveLabel *lbl =
|
||||
(const BrigDirectiveLabel*)entryPtr;
|
||||
|
||||
DPRINTF(HSAILObject,"Initializing code, directive is "
|
||||
"kind_label, label is: %s \n",
|
||||
obj->getString(lbl->name));
|
||||
|
||||
labelMap.addLabel(lbl, inst_addr, obj);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_PRAGMA:
|
||||
{
|
||||
DPRINTF(HSAILObject, "Initializing code, directive "
|
||||
"is kind_pragma\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_COMMENT:
|
||||
{
|
||||
DPRINTF(HSAILObject, "Initializing code, directive is "
|
||||
"kind_comment\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
|
||||
{
|
||||
DPRINTF(HSAILObject, "Initializing code, directive is "
|
||||
"kind_arg_block_start\n");
|
||||
|
||||
storageMap->resetOffset(BRIG_SEGMENT_ARG);
|
||||
funcarg_size_scope = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
|
||||
{
|
||||
DPRINTF(HSAILObject, "Initializing code, directive is "
|
||||
"kind_arg_block_end\n");
|
||||
|
||||
funcarg_size = funcarg_size < funcarg_size_scope ?
|
||||
funcarg_size_scope : funcarg_size;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRIG_KIND_DIRECTIVE_END:
|
||||
DPRINTF(HSAILObject, "Initializing code, dircetive is "
|
||||
"kind_end\n");
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
if (entryPtr->kind >= BRIG_KIND_INST_BEGIN &&
|
||||
entryPtr->kind <= BRIG_KIND_INST_END) {
|
||||
|
||||
BrigInstBase *instPtr = (BrigInstBase*)entryPtr;
|
||||
TheGpuISA::MachInst machInst = { instPtr, obj };
|
||||
GPUStaticInst *iptr = decoder.decode(machInst);
|
||||
|
||||
if (iptr) {
|
||||
DPRINTF(HSAILObject, "Initializing code, processing inst "
|
||||
"byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
|
||||
inst_idx, instPtr->opcode);
|
||||
|
||||
TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
|
||||
iptr->instNum(inst_idx);
|
||||
iptr->instAddr(inst_addr);
|
||||
_insts.push_back(raw_inst);
|
||||
instructions.push_back(iptr);
|
||||
}
|
||||
inst_addr += sizeof(TheGpuISA::RawMachInst);
|
||||
++inst_idx;
|
||||
} else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
|
||||
entryPtr->kind < BRIG_KIND_OPERAND_END) {
|
||||
warn("unexpected operand entry in code segment\n");
|
||||
} else {
|
||||
// there are surely some more cases we will need to handle,
|
||||
// but we'll deal with them as we find them.
|
||||
fatal("unexpected directive kind %d inside kernel scope\n",
|
||||
entryPtr->kind);
|
||||
}
|
||||
}
|
||||
|
||||
entryPtr = brigNext(entryPtr);
|
||||
}
|
||||
|
||||
// compute Control Flow Graph for current kernel
|
||||
ControlFlowInfo::assignImmediatePostDominators(instructions);
|
||||
|
||||
max_sreg = SRegOperand::maxRegIdx;
|
||||
max_dreg = DRegOperand::maxRegIdx;
|
||||
max_creg = CRegOperand::maxRegIdx;
|
||||
|
||||
obj->currentCode = nullptr;
|
||||
}
|
||||
|
||||
HsailCode::HsailCode(const std::string &name_str,
|
||||
const BrigDirectiveExecutable *code_dir,
|
||||
const BrigObject *obj, StorageMap *objStorageMap)
|
||||
: HsaCode(name_str), private_size(-1), readonly_size(-1)
|
||||
{
|
||||
init(code_dir, obj, objStorageMap);
|
||||
}
|
||||
|
||||
void
|
||||
LabelMap::addLabel(const Brig::BrigDirectiveLabel *lblDir, int inst_index,
|
||||
const BrigObject *obj)
|
||||
{
|
||||
std::string lbl_name = obj->getString(lblDir->name);
|
||||
Label &lbl = map[lbl_name];
|
||||
|
||||
if (lbl.defined()) {
|
||||
fatal("Attempt to redefine existing label %s\n", lbl_name);
|
||||
}
|
||||
|
||||
lbl.define(lbl_name, inst_index);
|
||||
DPRINTF(HSAILObject, "label %s = %d\n", lbl_name, inst_index);
|
||||
}
|
||||
|
||||
Label*
|
||||
LabelMap::refLabel(const Brig::BrigDirectiveLabel *lblDir,
|
||||
const BrigObject *obj)
|
||||
{
|
||||
std::string name = obj->getString(lblDir->name);
|
||||
Label &lbl = map[name];
|
||||
lbl.checkName(name);
|
||||
|
||||
return &lbl;
|
||||
}
|
||||
|
||||
int
|
||||
getBrigDataTypeBytes(BrigType16_t t)
|
||||
{
|
||||
switch (t) {
|
||||
case BRIG_TYPE_S8:
|
||||
case BRIG_TYPE_U8:
|
||||
case BRIG_TYPE_B8:
|
||||
return 1;
|
||||
|
||||
case BRIG_TYPE_S16:
|
||||
case BRIG_TYPE_U16:
|
||||
case BRIG_TYPE_B16:
|
||||
case BRIG_TYPE_F16:
|
||||
return 2;
|
||||
|
||||
case BRIG_TYPE_S32:
|
||||
case BRIG_TYPE_U32:
|
||||
case BRIG_TYPE_B32:
|
||||
case BRIG_TYPE_F32:
|
||||
return 4;
|
||||
|
||||
case BRIG_TYPE_S64:
|
||||
case BRIG_TYPE_U64:
|
||||
case BRIG_TYPE_B64:
|
||||
case BRIG_TYPE_F64:
|
||||
return 8;
|
||||
|
||||
case BRIG_TYPE_B1:
|
||||
|
||||
default:
|
||||
fatal("unhandled symbol data type %d", t);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageSpace::addSymbol(const BrigDirectiveVariable *sym,
|
||||
const BrigObject *obj)
|
||||
{
|
||||
const char *sym_name = obj->getString(sym->name);
|
||||
uint64_t size = 0;
|
||||
uint64_t offset = 0;
|
||||
|
||||
if (sym->type & BRIG_TYPE_ARRAY) {
|
||||
size = getBrigDataTypeBytes(sym->type & ~BRIG_TYPE_ARRAY);
|
||||
size *= (((uint64_t)sym->dim.hi) << 32 | (uint64_t)sym->dim.lo);
|
||||
|
||||
offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type &
|
||||
~BRIG_TYPE_ARRAY));
|
||||
} else {
|
||||
size = getBrigDataTypeBytes(sym->type);
|
||||
offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type));
|
||||
}
|
||||
|
||||
nextOffset = offset + size;
|
||||
|
||||
DPRINTF(HSAILObject, "Adding SYMBOL %s size %d offset %#x, init: %d\n",
|
||||
sym_name, size, offset, sym->init);
|
||||
|
||||
StorageElement* se = new StorageElement(sym_name, offset, size, sym);
|
||||
elements.push_back(se);
|
||||
elements_by_addr.insert(AddrRange(offset, offset + size - 1), se);
|
||||
elements_by_brigptr[sym] = se;
|
||||
|
||||
return se;
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageSpace::findSymbol(std::string name)
|
||||
{
|
||||
for (auto it : elements) {
|
||||
if (it->name == name) {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageSpace::findSymbol(uint64_t addr)
|
||||
{
|
||||
assert(elements_by_addr.size() > 0);
|
||||
|
||||
auto se = elements_by_addr.contains(addr);
|
||||
|
||||
if (se == elements_by_addr.end()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return se->second;
|
||||
}
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageSpace::findSymbol(const BrigDirectiveVariable *brigptr)
|
||||
{
|
||||
assert(elements_by_brigptr.size() > 0);
|
||||
|
||||
auto se = elements_by_brigptr.find(brigptr);
|
||||
|
||||
if (se == elements_by_brigptr.end()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return se->second;
|
||||
}
|
||||
}
|
||||
|
||||
StorageMap::StorageMap(StorageMap *outerScope)
|
||||
: outerScopeMap(outerScope)
|
||||
{
|
||||
for (int i = 0; i < NumSegments; ++i)
|
||||
space[i] = new StorageSpace((BrigSegment)i);
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageMap::addSymbol(const BrigDirectiveVariable *sym, const BrigObject *obj)
|
||||
{
|
||||
BrigSegment8_t segment = sym->segment;
|
||||
|
||||
assert(segment >= Brig::BRIG_SEGMENT_FLAT);
|
||||
assert(segment < NumSegments);
|
||||
|
||||
return space[segment]->addSymbol(sym, obj);
|
||||
}
|
||||
|
||||
int
|
||||
StorageMap::getSize(Brig::BrigSegment segment)
|
||||
{
|
||||
assert(segment > Brig::BRIG_SEGMENT_GLOBAL);
|
||||
assert(segment < NumSegments);
|
||||
|
||||
if (segment != Brig::BRIG_SEGMENT_GROUP &&
|
||||
segment != Brig::BRIG_SEGMENT_READONLY) {
|
||||
return space[segment]->getSize();
|
||||
} else {
|
||||
int ret = space[segment]->getSize();
|
||||
|
||||
if (outerScopeMap) {
|
||||
ret += outerScopeMap->getSize(segment);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StorageMap::resetOffset(Brig::BrigSegment segment)
|
||||
{
|
||||
space[segment]->resetOffset();
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageMap::findSymbol(BrigSegment segment, std::string name)
|
||||
{
|
||||
StorageElement *se = space[segment]->findSymbol(name);
|
||||
|
||||
if (se)
|
||||
return se;
|
||||
|
||||
if (outerScopeMap)
|
||||
return outerScopeMap->findSymbol(segment, name);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageMap::findSymbol(Brig::BrigSegment segment, uint64_t addr)
|
||||
{
|
||||
StorageSpace *sp = space[segment];
|
||||
|
||||
if (!sp) {
|
||||
// there is no memory in segment?
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StorageElement *se = sp->findSymbol(addr);
|
||||
|
||||
if (se)
|
||||
return se;
|
||||
|
||||
if (outerScopeMap)
|
||||
return outerScopeMap->findSymbol(segment, addr);
|
||||
|
||||
return nullptr;
|
||||
|
||||
}
|
||||
|
||||
StorageElement*
|
||||
StorageMap::findSymbol(Brig::BrigSegment segment,
|
||||
const BrigDirectiveVariable *brigptr)
|
||||
{
|
||||
StorageSpace *sp = space[segment];
|
||||
|
||||
if (!sp) {
|
||||
// there is no memory in segment?
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StorageElement *se = sp->findSymbol(brigptr);
|
||||
|
||||
if (se)
|
||||
return se;
|
||||
|
||||
if (outerScopeMap)
|
||||
return outerScopeMap->findSymbol(segment, brigptr);
|
||||
|
||||
return nullptr;
|
||||
|
||||
}
|
||||
@@ -1,445 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#ifndef __HSAIL_CODE_HH__
|
||||
#define __HSAIL_CODE_HH__
|
||||
|
||||
#include <cassert>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/gpu_decoder.hh"
|
||||
#include "arch/hsail/Brig.h"
|
||||
#include "base/addr_range_map.hh"
|
||||
#include "base/intmath.hh"
|
||||
#include "config/the_gpu_isa.hh"
|
||||
#include "gpu-compute/hsa_code.hh"
|
||||
#include "gpu-compute/hsa_kernel_info.hh"
|
||||
#include "gpu-compute/misc.hh"
|
||||
|
||||
class BrigObject;
|
||||
class GPUStaticInst;
|
||||
|
||||
inline int
|
||||
popcount(uint64_t src, int sz)
|
||||
{
|
||||
int cnt = 0;
|
||||
|
||||
for (int i = 0; i < sz; ++i) {
|
||||
if (src & 1)
|
||||
++cnt;
|
||||
src >>= 1;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
inline int
|
||||
firstbit(uint64_t src, int sz)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sz; ++i) {
|
||||
if (src & 1)
|
||||
break;
|
||||
src >>= 1;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
inline int
|
||||
lastbit(uint64_t src, int sz)
|
||||
{
|
||||
int i0 = -1;
|
||||
|
||||
for (int i = 0; i < sz; ++i) {
|
||||
if (src & 1)
|
||||
i0 = i;
|
||||
src >>= 1;
|
||||
}
|
||||
|
||||
return i0;
|
||||
}
|
||||
|
||||
inline int
|
||||
signbit(uint64_t src, int sz)
|
||||
{
|
||||
int i0 = -1;
|
||||
|
||||
if (src & (1 << (sz - 1))) {
|
||||
for (int i = 0; i < sz - 1; ++i) {
|
||||
if (!(src & 1))
|
||||
i0 = i;
|
||||
src >>= 1;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < sz - 1; ++i) {
|
||||
if (src & 1)
|
||||
i0 = i;
|
||||
src >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
return i0;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
bitrev(uint64_t src, int sz)
|
||||
{
|
||||
uint64_t r = 0;
|
||||
|
||||
for (int i = 0; i < sz; ++i) {
|
||||
r <<= 1;
|
||||
if (src & 1)
|
||||
r |= 1;
|
||||
src >>= 1;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
mul_hi(uint32_t a, uint32_t b)
|
||||
{
|
||||
return ((uint64_t)a * (uint64_t)b) >> 32;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
mul_hi(int32_t a, int32_t b)
|
||||
{
|
||||
return ((int64_t)a * (int64_t)b) >> 32;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
mul_hi(uint64_t a, uint64_t b)
|
||||
{
|
||||
return ((uint64_t)a * (uint64_t)b) >> 32;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
mul_hi(int64_t a, int64_t b)
|
||||
{
|
||||
return ((int64_t)a * (int64_t)b) >> 32;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
mul_hi(double a, double b)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
class Label
|
||||
{
|
||||
public:
|
||||
std::string name;
|
||||
int value;
|
||||
|
||||
Label() : value(-1)
|
||||
{
|
||||
}
|
||||
|
||||
bool defined() { return value != -1; }
|
||||
|
||||
void
|
||||
checkName(std::string &_name)
|
||||
{
|
||||
if (name.empty()) {
|
||||
name = _name;
|
||||
} else {
|
||||
assert(name == _name);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
define(std::string &_name, int _value)
|
||||
{
|
||||
assert(!defined());
|
||||
assert(_value != -1);
|
||||
value = _value;
|
||||
checkName(_name);
|
||||
}
|
||||
|
||||
int
|
||||
get()
|
||||
{
|
||||
assert(defined());
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
class LabelMap
|
||||
{
|
||||
std::map<std::string, Label> map;
|
||||
|
||||
public:
|
||||
LabelMap() { }
|
||||
|
||||
void addLabel(const Brig::BrigDirectiveLabel *lbl, int inst_index,
|
||||
const BrigObject *obj);
|
||||
|
||||
Label *refLabel(const Brig::BrigDirectiveLabel *lbl,
|
||||
const BrigObject *obj);
|
||||
};
|
||||
|
||||
const int NumSegments = Brig::BRIG_SEGMENT_AMD_GCN;
|
||||
|
||||
extern const char *segmentNames[];
|
||||
|
||||
class StorageElement
|
||||
{
|
||||
public:
|
||||
std::string name;
|
||||
uint64_t offset;
|
||||
|
||||
uint64_t size;
|
||||
const Brig::BrigDirectiveVariable *brigSymbol;
|
||||
StorageElement(const char *_name, uint64_t _offset, int _size,
|
||||
const Brig::BrigDirectiveVariable *sym)
|
||||
: name(_name), offset(_offset), size(_size), brigSymbol(sym)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class StorageSpace
|
||||
{
|
||||
typedef std::map<const Brig::BrigDirectiveVariable*, StorageElement*>
|
||||
DirVarToSE_map;
|
||||
|
||||
std::list<StorageElement*> elements;
|
||||
AddrRangeMap<StorageElement*> elements_by_addr;
|
||||
DirVarToSE_map elements_by_brigptr;
|
||||
|
||||
uint64_t nextOffset;
|
||||
|
||||
public:
|
||||
StorageSpace(Brig::BrigSegment _class) : nextOffset(0)
|
||||
{
|
||||
}
|
||||
|
||||
StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
|
||||
const BrigObject *obj);
|
||||
|
||||
StorageElement* findSymbol(std::string name);
|
||||
StorageElement* findSymbol(uint64_t addr);
|
||||
StorageElement* findSymbol(const Brig::BrigDirectiveVariable *brigptr);
|
||||
|
||||
int getSize() { return nextOffset; }
|
||||
void resetOffset() { nextOffset = 0; }
|
||||
};
|
||||
|
||||
class StorageMap
|
||||
{
|
||||
StorageMap *outerScopeMap;
|
||||
StorageSpace *space[NumSegments];
|
||||
|
||||
public:
|
||||
StorageMap(StorageMap *outerScope = nullptr);
|
||||
|
||||
StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
|
||||
const BrigObject *obj);
|
||||
|
||||
StorageElement* findSymbol(Brig::BrigSegment segment, std::string name);
|
||||
StorageElement* findSymbol(Brig::BrigSegment segment, uint64_t addr);
|
||||
|
||||
StorageElement* findSymbol(Brig::BrigSegment segment,
|
||||
const Brig::BrigDirectiveVariable *brigptr);
|
||||
|
||||
// overloaded version to avoid casting
|
||||
StorageElement*
|
||||
findSymbol(Brig::BrigSegment8_t segment, std::string name)
|
||||
{
|
||||
return findSymbol((Brig::BrigSegment)segment, name);
|
||||
}
|
||||
|
||||
int getSize(Brig::BrigSegment segment);
|
||||
void resetOffset(Brig::BrigSegment segment);
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BT_DEFAULT,
|
||||
BT_B8,
|
||||
BT_U8,
|
||||
BT_U16,
|
||||
BT_U32,
|
||||
BT_U64,
|
||||
BT_S8,
|
||||
BT_S16,
|
||||
BT_S32,
|
||||
BT_S64,
|
||||
BT_F16,
|
||||
BT_F32,
|
||||
BT_F64,
|
||||
BT_NULL
|
||||
} base_type_e;
|
||||
|
||||
/* @class HsailCode
|
||||
* the HsailCode class is used to store information
|
||||
* about HSA kernels stored in the BRIG format. it holds
|
||||
* all information about a kernel, function, or variable
|
||||
* symbol and provides methods for accessing that
|
||||
* information.
|
||||
*/
|
||||
|
||||
class HsailCode final : public HsaCode
|
||||
{
|
||||
public:
|
||||
TheGpuISA::Decoder decoder;
|
||||
|
||||
StorageMap *storageMap;
|
||||
LabelMap labelMap;
|
||||
uint32_t kernarg_start;
|
||||
uint32_t kernarg_end;
|
||||
int32_t private_size;
|
||||
|
||||
int32_t readonly_size;
|
||||
|
||||
// We track the maximum register index used for each register
|
||||
// class when we load the code so we can size the register files
|
||||
// appropriately (i.e., one more than the max index).
|
||||
uint32_t max_creg; // maximum c-register index
|
||||
uint32_t max_sreg; // maximum s-register index
|
||||
uint32_t max_dreg; // maximum d-register index
|
||||
|
||||
HsailCode(const std::string &name_str,
|
||||
const Brig::BrigDirectiveExecutable *code_dir,
|
||||
const BrigObject *obj,
|
||||
StorageMap *objStorageMap);
|
||||
|
||||
// this version is used to create a placeholder when
|
||||
// we encounter a kernel-related directive before the
|
||||
// kernel itself
|
||||
HsailCode(const std::string &name_str);
|
||||
|
||||
void init(const Brig::BrigDirectiveExecutable *code_dir,
|
||||
const BrigObject *obj, StorageMap *objStorageMap);
|
||||
|
||||
void
|
||||
generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const
|
||||
{
|
||||
hsaKernelInfo->sRegCount = max_sreg + 1;
|
||||
hsaKernelInfo->dRegCount = max_dreg + 1;
|
||||
hsaKernelInfo->cRegCount = max_creg + 1;
|
||||
|
||||
hsaKernelInfo->static_lds_size = getSize(Brig::BRIG_SEGMENT_GROUP);
|
||||
|
||||
hsaKernelInfo->private_mem_size =
|
||||
roundUp(getSize(Brig::BRIG_SEGMENT_PRIVATE), 8);
|
||||
|
||||
hsaKernelInfo->spill_mem_size =
|
||||
roundUp(getSize(Brig::BRIG_SEGMENT_SPILL), 8);
|
||||
}
|
||||
|
||||
int
|
||||
getSize(MemorySegment segment) const
|
||||
{
|
||||
Brig::BrigSegment brigSeg;
|
||||
|
||||
switch (segment) {
|
||||
case MemorySegment::NONE:
|
||||
brigSeg = Brig::BRIG_SEGMENT_NONE;
|
||||
break;
|
||||
case MemorySegment::FLAT:
|
||||
brigSeg = Brig::BRIG_SEGMENT_FLAT;
|
||||
break;
|
||||
case MemorySegment::GLOBAL:
|
||||
brigSeg = Brig::BRIG_SEGMENT_GLOBAL;
|
||||
break;
|
||||
case MemorySegment::READONLY:
|
||||
brigSeg = Brig::BRIG_SEGMENT_READONLY;
|
||||
break;
|
||||
case MemorySegment::KERNARG:
|
||||
brigSeg = Brig::BRIG_SEGMENT_KERNARG;
|
||||
break;
|
||||
case MemorySegment::GROUP:
|
||||
brigSeg = Brig::BRIG_SEGMENT_GROUP;
|
||||
break;
|
||||
case MemorySegment::PRIVATE:
|
||||
brigSeg = Brig::BRIG_SEGMENT_PRIVATE;
|
||||
break;
|
||||
case MemorySegment::SPILL:
|
||||
brigSeg = Brig::BRIG_SEGMENT_SPILL;
|
||||
break;
|
||||
case MemorySegment::ARG:
|
||||
brigSeg = Brig::BRIG_SEGMENT_ARG;
|
||||
break;
|
||||
case MemorySegment::EXTSPACE0:
|
||||
brigSeg = Brig::BRIG_SEGMENT_AMD_GCN;
|
||||
break;
|
||||
default:
|
||||
fatal("Unknown BrigSegment type.\n");
|
||||
}
|
||||
|
||||
return getSize(brigSeg);
|
||||
}
|
||||
|
||||
private:
|
||||
int
|
||||
getSize(Brig::BrigSegment segment) const
|
||||
{
|
||||
if (segment == Brig::BRIG_SEGMENT_PRIVATE) {
|
||||
// with the code generated by new HSA compiler the assertion
|
||||
// does not hold anymore..
|
||||
//assert(private_size != -1);
|
||||
return private_size;
|
||||
} else {
|
||||
return storageMap->getSize(segment);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
StorageElement*
|
||||
findSymbol(Brig::BrigSegment segment, uint64_t addr)
|
||||
{
|
||||
return storageMap->findSymbol(segment, addr);
|
||||
}
|
||||
|
||||
void
|
||||
setPrivateSize(int32_t _private_size)
|
||||
{
|
||||
private_size = _private_size;
|
||||
}
|
||||
|
||||
Label*
|
||||
refLabel(const Brig::BrigDirectiveLabel *lbl, const BrigObject *obj)
|
||||
{
|
||||
return labelMap.refLabel(lbl, obj);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __HSAIL_CODE_HH__
|
||||
@@ -1,295 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#include "gpu-compute/kernel_cfg.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
|
||||
void
|
||||
ControlFlowInfo::assignImmediatePostDominators(
|
||||
const std::vector<GPUStaticInst*>& instructions)
|
||||
{
|
||||
ControlFlowInfo cfg(instructions);
|
||||
cfg.findImmediatePostDominators();
|
||||
}
|
||||
|
||||
|
||||
ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
|
||||
instructions(insts)
|
||||
{
|
||||
createBasicBlocks();
|
||||
connectBasicBlocks();
|
||||
}
|
||||
|
||||
BasicBlock*
|
||||
ControlFlowInfo::basicBlock(int inst_addr) const {
|
||||
for (auto& block: basicBlocks) {
|
||||
int first_block_addr = block->firstInstruction->instAddr();
|
||||
if (inst_addr >= first_block_addr && inst_addr <
|
||||
first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
|
||||
return block.get();
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
GPUStaticInst*
|
||||
ControlFlowInfo::lastInstruction(const BasicBlock* block) const
|
||||
{
|
||||
if (block->isExit()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return instructions.at(block->firstInstruction->instNum() +
|
||||
block->size - 1);
|
||||
}
|
||||
|
||||
BasicBlock*
|
||||
ControlFlowInfo::postDominator(const BasicBlock* block) const
|
||||
{
|
||||
if (block->isExit()) {
|
||||
return nullptr;
|
||||
}
|
||||
return basicBlock(lastInstruction(block)->ipdInstNum());
|
||||
}
|
||||
|
||||
void
|
||||
ControlFlowInfo::createBasicBlocks()
|
||||
{
|
||||
assert(!instructions.empty());
|
||||
std::set<int> leaders;
|
||||
// first instruction is a leader
|
||||
leaders.insert(0);
|
||||
for (const auto &instruction : instructions) {
|
||||
if (instruction->isBranch()) {
|
||||
const int target_pc = instruction->getTargetPc();
|
||||
leaders.insert(target_pc);
|
||||
leaders.insert(instruction->nextInstAddr());
|
||||
}
|
||||
}
|
||||
|
||||
size_t block_size = 0;
|
||||
for (const auto &instruction : instructions) {
|
||||
if (leaders.find(instruction->instAddr()) != leaders.end()) {
|
||||
uint32_t id = basicBlocks.size();
|
||||
if (id > 0) {
|
||||
basicBlocks.back()->size = block_size;
|
||||
}
|
||||
block_size = 0;
|
||||
basicBlocks.emplace_back(new BasicBlock(id, instruction));
|
||||
}
|
||||
block_size++;
|
||||
}
|
||||
basicBlocks.back()->size = block_size;
|
||||
// exit basic block
|
||||
basicBlocks.emplace_back(new BasicBlock(basicBlocks.size(), nullptr));
|
||||
}
|
||||
|
||||
void
|
||||
ControlFlowInfo::connectBasicBlocks()
|
||||
{
|
||||
BasicBlock* exit_bb = basicBlocks.back().get();
|
||||
for (auto& bb : basicBlocks) {
|
||||
if (bb->isExit()) {
|
||||
break;
|
||||
}
|
||||
GPUStaticInst* last = lastInstruction(bb.get());
|
||||
if (last->isReturn()) {
|
||||
bb->successorIds.insert(exit_bb->id);
|
||||
continue;
|
||||
}
|
||||
if (last->isBranch()) {
|
||||
const uint32_t target_pc = last->getTargetPc();
|
||||
BasicBlock* target_bb = basicBlock(target_pc);
|
||||
bb->successorIds.insert(target_bb->id);
|
||||
}
|
||||
|
||||
// Unconditional jump instructions have a unique successor
|
||||
if (!last->isUnconditionalJump()) {
|
||||
BasicBlock* next_bb = basicBlock(last->nextInstAddr());
|
||||
bb->successorIds.insert(next_bb->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// In-place set intersection
|
||||
static void
|
||||
intersect(std::set<uint32_t>& a, const std::set<uint32_t>& b)
|
||||
{
|
||||
std::set<uint32_t>::iterator it = a.begin();
|
||||
while (it != a.end()) {
|
||||
it = b.find(*it) != b.end() ? ++it : a.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ControlFlowInfo::findPostDominators()
|
||||
{
|
||||
// the only postdominator of the exit block is itself
|
||||
basicBlocks.back()->postDominatorIds.insert(basicBlocks.back()->id);
|
||||
//copy all basic blocks to all postdominator lists except for exit block
|
||||
for (auto& block : basicBlocks) {
|
||||
if (!block->isExit()) {
|
||||
for (uint32_t i = 0; i < basicBlocks.size(); i++) {
|
||||
block->postDominatorIds.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool change = true;
|
||||
while (change) {
|
||||
change = false;
|
||||
for (int h = basicBlocks.size() - 2; h >= 0; --h) {
|
||||
size_t num_postdominators =
|
||||
basicBlocks[h]->postDominatorIds.size();
|
||||
for (int s : basicBlocks[h]->successorIds) {
|
||||
intersect(basicBlocks[h]->postDominatorIds,
|
||||
basicBlocks[s]->postDominatorIds);
|
||||
}
|
||||
basicBlocks[h]->postDominatorIds.insert(h);
|
||||
change |= (num_postdominators
|
||||
!= basicBlocks[h]->postDominatorIds.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// In-place set difference
|
||||
static void
|
||||
setDifference(std::set<uint32_t>&a,
|
||||
const std::set<uint32_t>& b, uint32_t exception)
|
||||
{
|
||||
for (uint32_t b_elem : b) {
|
||||
if (b_elem != exception) {
|
||||
a.erase(b_elem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ControlFlowInfo::findImmediatePostDominators()
|
||||
{
|
||||
assert(basicBlocks.size() > 1); // Entry and exit blocks must be present
|
||||
|
||||
findPostDominators();
|
||||
|
||||
for (auto& basicBlock : basicBlocks) {
|
||||
if (basicBlock->isExit()) {
|
||||
continue;
|
||||
}
|
||||
std::set<uint32_t> candidates = basicBlock->postDominatorIds;
|
||||
candidates.erase(basicBlock->id);
|
||||
for (uint32_t postDominatorId : basicBlock->postDominatorIds) {
|
||||
if (postDominatorId != basicBlock->id) {
|
||||
setDifference(candidates,
|
||||
basicBlocks[postDominatorId]->postDominatorIds,
|
||||
postDominatorId);
|
||||
}
|
||||
}
|
||||
assert(candidates.size() == 1);
|
||||
GPUStaticInst* last_instruction = lastInstruction(basicBlock.get());
|
||||
BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
|
||||
if (!ipd_block->isExit()) {
|
||||
GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
|
||||
last_instruction->ipdInstNum(ipd_first_inst->instAddr());
|
||||
} else {
|
||||
last_instruction->ipdInstNum(last_instruction->nextInstAddr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ControlFlowInfo::printPostDominators() const
|
||||
{
|
||||
for (auto& block : basicBlocks) {
|
||||
std::cout << "PD(" << block->id << ") = {";
|
||||
std::copy(block->postDominatorIds.begin(),
|
||||
block->postDominatorIds.end(),
|
||||
std::ostream_iterator<uint32_t>(std::cout, ", "));
|
||||
std::cout << "}" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ControlFlowInfo::printImmediatePostDominators() const
|
||||
{
|
||||
for (const auto& block : basicBlocks) {
|
||||
if (block->isExit()) {
|
||||
continue;
|
||||
}
|
||||
std::cout << "IPD(" << block->id << ") = ";
|
||||
std::cout << postDominator(block.get())->id << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
void
|
||||
ControlFlowInfo::printBasicBlocks() const
|
||||
{
|
||||
for (GPUStaticInst* inst : instructions) {
|
||||
int inst_addr = inst->instAddr();
|
||||
std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
|
||||
<< "]: " << inst->disassemble();
|
||||
if (inst->isBranch()) {
|
||||
std::cout << ", PC = " << inst->getTargetPc();
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ControlFlowInfo::printBasicBlockDot() const
|
||||
{
|
||||
printf("digraph {\n");
|
||||
for (const auto& basic_block : basicBlocks) {
|
||||
printf("\t");
|
||||
for (uint32_t successorId : basic_block->successorIds) {
|
||||
printf("%d -> %d; ", basic_block->id, successorId);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("}\n");
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#ifndef __KERNEL_CFG_HH__
|
||||
#define __KERNEL_CFG_HH__
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
|
||||
class GPUStaticInst;
|
||||
class HsailCode;
|
||||
|
||||
struct BasicBlock
|
||||
{
|
||||
BasicBlock(uint32_t num, GPUStaticInst* begin) :
|
||||
id(num), size(0), firstInstruction(begin)
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
isEntry() const
|
||||
{
|
||||
return !id;
|
||||
}
|
||||
|
||||
bool
|
||||
isExit() const
|
||||
{
|
||||
return !size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unique identifier for the block within a given kernel.
|
||||
*/
|
||||
const uint32_t id;
|
||||
|
||||
/**
|
||||
* Number of instructions contained in the block
|
||||
*/
|
||||
size_t size;
|
||||
|
||||
/**
|
||||
* Pointer to first instruction of the block.
|
||||
*/
|
||||
GPUStaticInst* firstInstruction;
|
||||
|
||||
/**
|
||||
* Identifiers of the blocks that follow (are reachable from) this block.
|
||||
*/
|
||||
std::set<uint32_t> successorIds;
|
||||
|
||||
/**
|
||||
* Identifiers of the blocks that will be visited from this block.
|
||||
*/
|
||||
std::set<uint32_t> postDominatorIds;
|
||||
};
|
||||
|
||||
class ControlFlowInfo
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Compute immediate post-dominator instruction for kernel instructions.
|
||||
*/
|
||||
static void assignImmediatePostDominators(
|
||||
const std::vector<GPUStaticInst*>& instructions);
|
||||
|
||||
private:
|
||||
ControlFlowInfo(const std::vector<GPUStaticInst*>& instructions);
|
||||
|
||||
GPUStaticInst* lastInstruction(const BasicBlock* block) const;
|
||||
|
||||
BasicBlock* basicBlock(int inst_addr) const;
|
||||
|
||||
BasicBlock* postDominator(const BasicBlock* block) const;
|
||||
|
||||
void createBasicBlocks();
|
||||
|
||||
void connectBasicBlocks();
|
||||
|
||||
void findPostDominators();
|
||||
|
||||
void findImmediatePostDominators();
|
||||
|
||||
void printBasicBlocks() const;
|
||||
|
||||
void printBasicBlockDot() const;
|
||||
|
||||
void printPostDominators() const;
|
||||
|
||||
void printImmediatePostDominators() const;
|
||||
|
||||
std::vector<std::unique_ptr<BasicBlock>> basicBlocks;
|
||||
std::vector<GPUStaticInst*> instructions;
|
||||
};
|
||||
|
||||
#endif // __KERNEL_CFG_HH__
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Steve Reinhardt
|
||||
*/
|
||||
|
||||
#ifndef __NDRANGE_HH__
|
||||
#define __NDRANGE_HH__
|
||||
|
||||
#include "base/types.hh"
|
||||
#include "gpu-compute/qstruct.hh"
|
||||
|
||||
struct NDRange
|
||||
{
|
||||
// copy of the queue entry provided at dispatch
|
||||
HsaQueueEntry q;
|
||||
|
||||
// The current workgroup id (3 dimensions)
|
||||
int wgId[3];
|
||||
// The number of workgroups in each dimension
|
||||
int numWg[3];
|
||||
// The total number of workgroups
|
||||
int numWgTotal;
|
||||
|
||||
// The number of completed work groups
|
||||
int numWgCompleted;
|
||||
// The global workgroup ID
|
||||
uint32_t globalWgId;
|
||||
|
||||
// flag indicating whether all work groups have been launched
|
||||
bool wg_disp_rem;
|
||||
// kernel complete
|
||||
bool execDone;
|
||||
bool userDoorBellSet;
|
||||
volatile bool *addrToNotify;
|
||||
volatile uint32_t *numDispLeft;
|
||||
int dispatchId;
|
||||
int curCid; // Current context id
|
||||
};
|
||||
|
||||
#endif // __NDRANGE_HH__
|
||||
@@ -1,148 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Brad Beckmann, Marc Orr
|
||||
*/
|
||||
|
||||
#ifndef __Q_STRUCT_HH__
|
||||
#define __Q_STRUCT_HH__
|
||||
|
||||
#include <bitset>
|
||||
#include <cstdint>
|
||||
|
||||
// Maximum number of arguments
|
||||
static const int KER_NUM_ARGS = 32;
|
||||
// Kernel argument buffer size
|
||||
static const int KER_ARGS_LENGTH = 512;
|
||||
|
||||
class LdsChunk;
|
||||
struct NDRange;
|
||||
|
||||
// Be very careful of alignment in this structure. The structure
|
||||
// must compile to the same layout in both 32-bit and 64-bit mode.
|
||||
struct HsaQueueEntry
|
||||
{
|
||||
// Base pointer for array of instruction pointers
|
||||
uint64_t code_ptr;
|
||||
// Grid Size (3 dimensions)
|
||||
uint32_t gdSize[3];
|
||||
// Workgroup Size (3 dimensions)
|
||||
uint32_t wgSize[3];
|
||||
uint16_t sRegCount;
|
||||
uint16_t dRegCount;
|
||||
uint16_t cRegCount;
|
||||
uint64_t privMemStart;
|
||||
uint32_t privMemPerItem;
|
||||
uint32_t privMemTotal;
|
||||
uint64_t spillMemStart;
|
||||
uint32_t spillMemPerItem;
|
||||
uint32_t spillMemTotal;
|
||||
uint64_t roMemStart;
|
||||
uint32_t roMemTotal;
|
||||
// Size (in bytes) of LDS
|
||||
uint32_t ldsSize;
|
||||
// Virtual Memory Id (unused right now)
|
||||
uint32_t vmId;
|
||||
|
||||
// Pointer to dependency chain (unused now)
|
||||
uint64_t depends;
|
||||
|
||||
// pointer to bool
|
||||
uint64_t addrToNotify;
|
||||
// pointer to uint32_t
|
||||
uint64_t numDispLeft;
|
||||
|
||||
// variables to pass arguments when running in standalone mode,
|
||||
// will be removed when run.py and sh.cpp have been updated to
|
||||
// use args and offset arrays
|
||||
uint64_t arg1;
|
||||
uint64_t arg2;
|
||||
uint64_t arg3;
|
||||
uint64_t arg4;
|
||||
|
||||
// variables to pass arguments when running in cpu+gpu mode
|
||||
uint8_t args[KER_ARGS_LENGTH];
|
||||
uint16_t offsets[KER_NUM_ARGS];
|
||||
uint16_t num_args;
|
||||
};
|
||||
|
||||
// State that needs to be passed between the simulation and simulated app, a
|
||||
// pointer to this struct can be passed through the depends field in the
|
||||
// HsaQueueEntry struct
|
||||
struct HostState
|
||||
{
|
||||
// cl_event* has original HsaQueueEntry for init
|
||||
uint64_t event;
|
||||
};
|
||||
|
||||
// Total number of HSA queues
|
||||
static const int HSAQ_NQUEUES = 8;
|
||||
|
||||
// These values will eventually live in memory mapped registers
|
||||
// and be settable by the kernel mode driver.
|
||||
|
||||
// Number of entries in each HSA queue
|
||||
static const int HSAQ_SIZE = 64;
|
||||
// Address of first HSA queue index
|
||||
static const int HSAQ_INDX_BASE = 0x10000ll;
|
||||
// Address of first HSA queue
|
||||
static const int HSAQ_BASE = 0x11000ll;
|
||||
// Suggested start of HSA code
|
||||
static const int HSA_CODE_BASE = 0x18000ll;
|
||||
|
||||
// These are shortcuts for deriving the address of a specific
|
||||
// HSA queue or queue index
|
||||
#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n)
|
||||
#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue))
|
||||
#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0))
|
||||
#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1))
|
||||
#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2))
|
||||
|
||||
/*
|
||||
* Example code for writing to a queue
|
||||
*
|
||||
* void
|
||||
* ToQueue(int n,struct fsaQueue *val)
|
||||
* {
|
||||
* int wi = *(int*)HSAQ_WI(n);
|
||||
* int ri = *(int*)HSAQ_RI(n);
|
||||
* int ci = *(int*)HSAQ_CI(n);
|
||||
*
|
||||
* if (ci - ri < HSAQ_SIZE) {
|
||||
* (*(int*)HSAQ_CI(n))++;
|
||||
* *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val;
|
||||
* (*(int*)HSAQ_WI(n))++;
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
#endif // __Q_STRUCT_HH__
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: John Kalamatianos
|
||||
*/
|
||||
|
||||
#include "gpu-compute/vector_register_state.hh"
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "gpu-compute/compute_unit.hh"
|
||||
|
||||
VecRegisterState::VecRegisterState() : computeUnit(nullptr)
|
||||
{
|
||||
s_reg.clear();
|
||||
d_reg.clear();
|
||||
}
|
||||
|
||||
void
|
||||
VecRegisterState::setParent(ComputeUnit *_computeUnit)
|
||||
{
|
||||
computeUnit = _computeUnit;
|
||||
_name = computeUnit->name() + ".VecRegState";
|
||||
}
|
||||
|
||||
void
|
||||
VecRegisterState::init(uint32_t _size, uint32_t wf_size)
|
||||
{
|
||||
s_reg.resize(_size);
|
||||
fatal_if(wf_size > std::numeric_limits<unsigned long long>::digits ||
|
||||
wf_size <= 0,
|
||||
"WF size is larger than the host can support or is zero");
|
||||
fatal_if((wf_size & (wf_size - 1)) != 0,
|
||||
"Wavefront size should be a power of 2");
|
||||
for (int i = 0; i < s_reg.size(); ++i) {
|
||||
s_reg[i].resize(wf_size, 0);
|
||||
}
|
||||
d_reg.resize(_size);
|
||||
for (int i = 0; i < d_reg.size(); ++i) {
|
||||
d_reg[i].resize(wf_size, 0);
|
||||
}
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: John Kalamatianos
|
||||
*/
|
||||
|
||||
#ifndef __VECTOR_REGISTER_STATE_HH__
|
||||
#define __VECTOR_REGISTER_STATE_HH__
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gpu-compute/misc.hh"
|
||||
|
||||
class ComputeUnit;
|
||||
|
||||
// Vector Register State per SIMD unit (contents of the vector
|
||||
// registers in the VRF of the SIMD)
|
||||
class VecRegisterState
|
||||
{
|
||||
public:
|
||||
VecRegisterState();
|
||||
void init(uint32_t _size, uint32_t wf_size);
|
||||
|
||||
const std::string& name() const { return _name; }
|
||||
void setParent(ComputeUnit *_computeUnit);
|
||||
void regStats() { }
|
||||
|
||||
// Access methods
|
||||
template<typename T>
|
||||
T
|
||||
read(int regIdx, int threadId=0) {
|
||||
T *p0;
|
||||
assert(sizeof(T) == 4 || sizeof(T) == 8);
|
||||
if (sizeof(T) == 4) {
|
||||
p0 = (T*)(&s_reg[regIdx][threadId]);
|
||||
} else {
|
||||
p0 = (T*)(&d_reg[regIdx][threadId]);
|
||||
}
|
||||
|
||||
return *p0;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
write(unsigned int regIdx, T value, int threadId=0) {
|
||||
T *p0;
|
||||
assert(sizeof(T) == 4 || sizeof(T) == 8);
|
||||
if (sizeof(T) == 4) {
|
||||
p0 = (T*)(&s_reg[regIdx][threadId]);
|
||||
} else {
|
||||
p0 = (T*)(&d_reg[regIdx][threadId]);
|
||||
}
|
||||
|
||||
*p0 = value;
|
||||
}
|
||||
|
||||
// (Single Precision) Vector Register File size.
|
||||
int regSize() { return s_reg.size(); }
|
||||
|
||||
private:
|
||||
ComputeUnit *computeUnit;
|
||||
std::string _name;
|
||||
// 32-bit Single Precision Vector Register State
|
||||
std::vector<std::vector<uint32_t>> s_reg;
|
||||
// 64-bit Double Precision Vector Register State
|
||||
std::vector<std::vector<uint64_t>> d_reg;
|
||||
};
|
||||
|
||||
#endif // __VECTOR_REGISTER_STATE_HH__
|
||||
@@ -88,7 +88,7 @@ def _validateTags(commit_header):
|
||||
# @todo this is error prone, and should be extracted automatically from
|
||||
# a file
|
||||
|
||||
valid_tags = ["arch", "arch-arm", "arch-gcn3", "arch-hsail",
|
||||
valid_tags = ["arch", "arch-arm", "arch-gcn3",
|
||||
"arch-mips", "arch-power", "arch-riscv", "arch-sparc", "arch-x86",
|
||||
"base", "configs", "cpu", "cpu-kvm", "cpu-minor", "cpu-o3",
|
||||
"cpu-simple", "dev", "dev-arm", "dev-virtio", "ext", "fastmodel",
|
||||
|
||||
@@ -49,7 +49,7 @@ add_option('--builds',
|
||||
'SPARC,' \
|
||||
'X86,X86_MESI_Two_Level,' \
|
||||
'RISCV,' \
|
||||
'HSAIL_X86',
|
||||
'GCN3_X86',
|
||||
help="comma-separated build targets to test (default: '%default')")
|
||||
add_option('--modes',
|
||||
default='se,fs',
|
||||
|
||||
Reference in New Issue
Block a user