arch-vega: Add Vega ISA as a copy of GCN3

This changeset adds Vega support as a copy of GCN3.
Configs have been modified to include both ISAs.
Current implementation is not complete and needs
modifications to fully comply with the ISA manual:

https://developer.amd.com/wp-content/resources/
Vega_Shader_ISA_28July2017.pdf

Change-Id: I608aa6747a45594f8e1bd7802da1883cf612168b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42204
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Kyle Roarty
2019-06-27 12:22:29 -04:00
committed by Matt Sinclair
parent c7ee47efc9
commit f7d4ff6ef5
20 changed files with 144242 additions and 1 deletions

View File

@@ -68,6 +68,12 @@ arch-gcn3:
- Matt Poremba <matthew.poremba@amd.com>
- Matt Sinclair <sinclair@cs.wisc.edu>
arch-vega:
status: maintained
maintainers:
- Matt Poremba <matthew.poremba@amd.com>
- Matt Sinclair <sinclair@cs.wisc.edu>
arch-mips:
status: orphaned

View File

@@ -67,7 +67,7 @@ env.SwitchingHeaders(
'''),
env.subst('${TARGET_ISA}'))
amdgpu_isa = ['gcn3']
amdgpu_isa = ['gcn3', 'vega']
env.SwitchingHeaders(
Split('''

View File

@@ -0,0 +1,45 @@
# -*- mode:python -*-
# Copyright (c) 2021 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import sys
Import('*')
if env['TARGET_GPU_ISA'] == 'vega':
Source('decoder.cc')
Source('insts/gpu_static_inst.cc')
Source('insts/instructions.cc')
Source('insts/op_encodings.cc')
Source('isa.cc')
Source('registers.cc')
DebugFlag('VEGA', 'Debug flag for VEGA GPU ISA')

View File

@@ -0,0 +1,36 @@
# -*- mode:python -*-
# Copyright (c) 2021 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
Import('*')
all_gpu_isa_list.append('vega')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,103 @@
/*
* Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_GPU_ISA_HH__
#define __ARCH_VEGA_GPU_ISA_HH__
#include <array>
#include <type_traits>
#include "arch/amdgpu/vega/gpu_registers.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/hsa_queue_entry.hh"
#include "gpu-compute/misc.hh"
class Wavefront;
namespace VegaISA
{
class GPUISA
{
public:
GPUISA(Wavefront &wf);
template<typename T> T
readConstVal(int opIdx) const
{
panic_if(!std::is_integral<T>::value, "Constant values must "
"be an integer.\n");
T val(0);
if (isPosConstVal(opIdx)) {
val = (T)readPosConstReg(opIdx);
}
if (isNegConstVal(opIdx)) {
val = (T)readNegConstReg(opIdx);
}
return val;
}
ScalarRegU32 readMiscReg(int opIdx) const;
void writeMiscReg(int opIdx, ScalarRegU32 operandVal);
bool hasScalarUnit() const { return true; }
void advancePC(GPUDynInstPtr gpuDynInst);
private:
ScalarRegU32 readPosConstReg(int opIdx) const
{
return posConstRegs[opIdx - REG_INT_CONST_POS_MIN];
}
ScalarRegI32 readNegConstReg(int opIdx) const
{
return negConstRegs[opIdx - REG_INT_CONST_NEG_MIN];
}
static const std::array<const ScalarRegU32, NumPosConstRegs>
posConstRegs;
static const std::array<const ScalarRegI32, NumNegConstRegs>
negConstRegs;
// parent wavefront
Wavefront &wavefront;
// shader status bits
StatusReg statusReg;
// memory descriptor reg
ScalarRegU32 m0;
};
} // namespace VegaISA
#endif // __ARCH_VEGA_GPU_ISA_HH__

View File

@@ -0,0 +1,186 @@
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_GPU_MEM_HELPERS_HH__
#define __ARCH_VEGA_GPU_MEM_HELPERS_HH__
#include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
#include "arch/amdgpu/vega/insts/op_encodings.hh"
#include "debug/GPUMem.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
/**
* Helper function for instructions declared in op_encodings. This function
* takes in all of the arguments for a given memory request we are trying to
* initialize, then submits the request or requests depending on if the
* original request is aligned or unaligned.
*/
template<typename T, int N>
inline void
initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
bool is_atomic=false)
{
// local variables
int req_size = N * sizeof(T);
int block_size = gpuDynInst->computeUnit()->cacheLineSize();
Addr vaddr = 0, split_addr = 0;
bool misaligned_acc = false;
RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
gpuDynInst->resetEntireStatusVector();
for (int lane = 0; lane < VegaISA::NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vaddr = gpuDynInst->addr[lane];
/**
* the base address of the cache line where the the last
* byte of the request will be stored.
*/
split_addr = roundDown(vaddr + req_size - 1, block_size);
assert(split_addr <= vaddr || split_addr - vaddr < block_size);
/**
* if the base cache line address of the last byte is
* greater than the address of the first byte then we have
* a misaligned access.
*/
misaligned_acc = split_addr > vaddr;
if (is_atomic) {
// make sure request is word aligned
assert((vaddr & 0x3) == 0);
// a given lane's atomic can't cross cache lines
assert(!misaligned_acc);
req = std::make_shared<Request>(0, vaddr, sizeof(T), 0,
gpuDynInst->computeUnit()->masterId(), 0,
gpuDynInst->wfDynId,
gpuDynInst->makeAtomicOpFunctor<T>(
&(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
&(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
} else {
req = std::make_shared<Request>(0, vaddr, req_size, 0,
gpuDynInst->computeUnit()->masterId(), 0,
gpuDynInst->wfDynId);
}
if (misaligned_acc) {
gpuDynInst->setStatusVector(lane, 2);
req->splitOnVaddr(split_addr, req1, req2);
gpuDynInst->setRequestFlags(req1);
gpuDynInst->setRequestFlags(req2);
pkt1 = new Packet(req1, mem_req_type);
pkt2 = new Packet(req2, mem_req_type);
pkt1->dataStatic(&(reinterpret_cast<T*>(
gpuDynInst->d_data))[lane * N]);
pkt2->dataStatic(&(reinterpret_cast<T*>(
gpuDynInst->d_data))[lane * N + req1->getSize()]);
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
"request for %#x\n", gpuDynInst->cu_id,
gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
split_addr);
gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
} else {
gpuDynInst->setStatusVector(lane, 1);
gpuDynInst->setRequestFlags(req);
pkt = new Packet(req, mem_req_type);
pkt->dataStatic(&(reinterpret_cast<T*>(
gpuDynInst->d_data))[lane * N]);
gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
}
} else { // if lane is not active, then no pending requests
gpuDynInst->setStatusVector(lane, 0);
}
}
}
/**
* Helper function for scalar instructions declared in op_encodings. This
* function takes in all of the arguments for a given memory request we are
* trying to initialize, then submits the request or requests depending on if
* the original request is aligned or unaligned.
*/
template<typename T, int N>
inline void
initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
{
int req_size = N * sizeof(T);
int block_size = gpuDynInst->computeUnit()->cacheLineSize();
Addr vaddr = gpuDynInst->scalarAddr;
/**
* the base address of the cache line where the the last byte of
* the request will be stored.
*/
Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
assert(split_addr <= vaddr || split_addr - vaddr < block_size);
/**
* if the base cache line address of the last byte is greater
* than the address of the first byte then we have a misaligned
* access.
*/
bool misaligned_acc = split_addr > vaddr;
RequestPtr req = std::make_shared<Request>(0, vaddr, req_size, 0,
gpuDynInst->computeUnit()->masterId(), 0,
gpuDynInst->wfDynId);
if (misaligned_acc) {
RequestPtr req1, req2;
req->splitOnVaddr(split_addr, req1, req2);
gpuDynInst->numScalarReqs = 2;
gpuDynInst->setRequestFlags(req1);
gpuDynInst->setRequestFlags(req2);
PacketPtr pkt1 = new Packet(req1, mem_req_type);
PacketPtr pkt2 = new Packet(req2, mem_req_type);
pkt1->dataStatic(gpuDynInst->scalar_data);
pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
" %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, split_addr);
gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
} else {
gpuDynInst->numScalarReqs = 1;
gpuDynInst->setRequestFlags(req);
PacketPtr pkt = new Packet(req, mem_req_type);
pkt->dataStatic(gpuDynInst->scalar_data);
gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
}
}
#endif // __ARCH_VEGA_GPU_MEM_HELPERS_HH__

View File

@@ -0,0 +1,256 @@
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_REGISTERS_HH__
#define __ARCH_VEGA_REGISTERS_HH__
#include <array>
#include <cstdint>
#include <string>
#include "arch/generic/vec_reg.hh"
#include "base/intmath.hh"
#include "base/logging.hh"
namespace VegaISA
{
enum OpSelector : int
{
REG_SGPR_MIN = 0,
REG_SGPR_MAX = 101,
REG_FLAT_SCRATCH_LO = 102,
REG_FLAT_SCRATCH_HI = 103,
REG_XNACK_MASK_LO = 104,
REG_XNACK_MASK_HI = 105,
REG_VCC_LO = 106,
REG_VCC_HI = 107,
REG_TBA_LO = 108,
REG_TBA_HI = 109,
REG_TMA_LO = 110,
REG_TMA_HI = 111,
REG_TTMP_0 = 112,
REG_TTMP_1 = 113,
REG_TTMP_2 = 114,
REG_TTMP_3 = 115,
REG_TTMP_4 = 116,
REG_TTMP_5 = 117,
REG_TTMP_6 = 118,
REG_TTMP_7 = 119,
REG_TTMP_8 = 120,
REG_TTMP_9 = 121,
REG_TTMP_10 = 122,
REG_TTMP_11 = 123,
REG_M0 = 124,
REG_RESERVED_1 = 125,
REG_EXEC_LO = 126,
REG_EXEC_HI = 127,
REG_ZERO = 128,
REG_INT_CONST_POS_MIN = 129,
REG_INT_CONST_POS_MAX = 192,
REG_INT_CONST_NEG_MIN = 193,
REG_INT_CONST_NEG_MAX = 208,
REG_RESERVED_2 = 209,
REG_RESERVED_3 = 210,
REG_RESERVED_4 = 211,
REG_RESERVED_5 = 212,
REG_RESERVED_6 = 213,
REG_RESERVED_7 = 214,
REG_RESERVED_8 = 215,
REG_RESERVED_9 = 216,
REG_RESERVED_10 = 217,
REG_RESERVED_11 = 218,
REG_RESERVED_12 = 219,
REG_RESERVED_13 = 220,
REG_RESERVED_14 = 221,
REG_RESERVED_15 = 222,
REG_RESERVED_16 = 223,
REG_RESERVED_17 = 224,
REG_RESERVED_18 = 225,
REG_RESERVED_19 = 226,
REG_RESERVED_20 = 227,
REG_RESERVED_21 = 228,
REG_RESERVED_22 = 229,
REG_RESERVED_23 = 230,
REG_RESERVED_24 = 231,
REG_RESERVED_25 = 232,
REG_RESERVED_26 = 233,
REG_RESERVED_27 = 234,
REG_RESERVED_28 = 235,
REG_RESERVED_29 = 236,
REG_RESERVED_30 = 237,
REG_RESERVED_31 = 238,
REG_RESERVED_32 = 239,
REG_POS_HALF = 240,
REG_NEG_HALF = 241,
REG_POS_ONE = 242,
REG_NEG_ONE = 243,
REG_POS_TWO = 244,
REG_NEG_TWO = 245,
REG_POS_FOUR = 246,
REG_NEG_FOUR = 247,
REG_PI = 248,
/* NOTE: SDWA and SWDA both refer to sub d-word addressing */
REG_SRC_SWDA = 249,
REG_SRC_DPP = 250,
REG_VCCZ = 251,
REG_EXECZ = 252,
REG_SCC = 253,
REG_LDS_DIRECT = 254,
REG_SRC_LITERAL = 255,
REG_VGPR_MIN = 256,
REG_VGPR_MAX = 511
};
constexpr size_t MaxOperandDwords(16);
const int NumVecElemPerVecReg(64);
// op selector values 129 - 192 correspond to const values 1 - 64
const int NumPosConstRegs = REG_INT_CONST_POS_MAX
- REG_INT_CONST_POS_MIN + 1;
// op selector values 193 - 208 correspond to const values -1 - 16
const int NumNegConstRegs = REG_INT_CONST_NEG_MAX
- REG_INT_CONST_NEG_MIN + 1;
const int BITS_PER_BYTE = 8;
const int BITS_PER_WORD = 16;
const int MSB_PER_BYTE = (BITS_PER_BYTE - 1);
const int MSB_PER_WORD = (BITS_PER_WORD - 1);
// typedefs for the various sizes/types of scalar regs
typedef uint8_t ScalarRegU8;
typedef int8_t ScalarRegI8;
typedef uint16_t ScalarRegU16;
typedef int16_t ScalarRegI16;
typedef uint32_t ScalarRegU32;
typedef int32_t ScalarRegI32;
typedef float ScalarRegF32;
typedef uint64_t ScalarRegU64;
typedef int64_t ScalarRegI64;
typedef double ScalarRegF64;
// typedefs for the various sizes/types of vector reg elements
typedef uint8_t VecElemU8;
typedef int8_t VecElemI8;
typedef uint16_t VecElemU16;
typedef int16_t VecElemI16;
typedef uint32_t VecElemU32;
typedef int32_t VecElemI32;
typedef float VecElemF32;
typedef uint64_t VecElemU64;
typedef int64_t VecElemI64;
typedef double VecElemF64;
const int DWORDSize = sizeof(VecElemU32);
/**
* Size of a single-precision register in DWORDs.
*/
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
// typedefs for the various sizes/types of vector regs
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;
using VecRegI8 = ::VecRegT<VecElemI8, NumVecElemPerVecReg, false>;
using VecRegU16 = ::VecRegT<VecElemU16, NumVecElemPerVecReg, false>;
using VecRegI16 = ::VecRegT<VecElemI16, NumVecElemPerVecReg, false>;
using VecRegU32 = ::VecRegT<VecElemU32, NumVecElemPerVecReg, false>;
using VecRegI32 = ::VecRegT<VecElemI32, NumVecElemPerVecReg, false>;
using VecRegF32 = ::VecRegT<VecElemF32, NumVecElemPerVecReg, false>;
using VecRegU64 = ::VecRegT<VecElemU64, NumVecElemPerVecReg, false>;
using VecRegI64 = ::VecRegT<VecElemI64, NumVecElemPerVecReg, false>;
using VecRegF64 = ::VecRegT<VecElemF64, NumVecElemPerVecReg, false>;
// non-writeable versions of vector regs
using ConstVecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, true>;
using ConstVecRegI8 = ::VecRegT<VecElemI8, NumVecElemPerVecReg, true>;
using ConstVecRegU16 = ::VecRegT<VecElemU16, NumVecElemPerVecReg, true>;
using ConstVecRegI16 = ::VecRegT<VecElemI16, NumVecElemPerVecReg, true>;
using ConstVecRegU32 = ::VecRegT<VecElemU32, NumVecElemPerVecReg, true>;
using ConstVecRegI32 = ::VecRegT<VecElemI32, NumVecElemPerVecReg, true>;
using ConstVecRegF32 = ::VecRegT<VecElemF32, NumVecElemPerVecReg, true>;
using ConstVecRegU64 = ::VecRegT<VecElemU64, NumVecElemPerVecReg, true>;
using ConstVecRegI64 = ::VecRegT<VecElemI64, NumVecElemPerVecReg, true>;
using ConstVecRegF64 = ::VecRegT<VecElemF64, NumVecElemPerVecReg, true>;
using VecRegContainerU8 = VecRegU8::Container;
using VecRegContainerU16 = VecRegU16::Container;
using VecRegContainerU32 = VecRegU32::Container;
using VecRegContainerU64 = VecRegU64::Container;
struct StatusReg
{
StatusReg() : SCC(0), SPI_PRIO(0), USER_PRIO(0), PRIV(0), TRAP_EN(0),
TTRACE_EN(0), EXPORT_RDY(0), EXECZ(0), VCCZ(0), IN_TG(0),
IN_BARRIER(0), HALT(0), TRAP(0), TTRACE_CU_EN(0), VALID(0),
ECC_ERR(0), SKIP_EXPORT(0), PERF_EN(0), COND_DBG_USER(0),
COND_DBG_SYS(0), ALLOW_REPLAY(0), INSTRUCTION_ATC(0), RESERVED(0),
MUST_EXPORT(0), RESERVED_1(0)
{
}
uint32_t SCC : 1;
uint32_t SPI_PRIO : 2;
uint32_t USER_PRIO : 2;
uint32_t PRIV : 1;
uint32_t TRAP_EN : 1;
uint32_t TTRACE_EN : 1;
uint32_t EXPORT_RDY : 1;
uint32_t EXECZ : 1;
uint32_t VCCZ : 1;
uint32_t IN_TG : 1;
uint32_t IN_BARRIER : 1;
uint32_t HALT : 1;
uint32_t TRAP : 1;
uint32_t TTRACE_CU_EN : 1;
uint32_t VALID : 1;
uint32_t ECC_ERR : 1;
uint32_t SKIP_EXPORT : 1;
uint32_t PERF_EN : 1;
uint32_t COND_DBG_USER : 1;
uint32_t COND_DBG_SYS : 1;
uint32_t ALLOW_REPLAY : 1;
uint32_t INSTRUCTION_ATC : 1;
uint32_t RESERVED : 3;
uint32_t MUST_EXPORT : 1;
uint32_t RESERVED_1 : 4;
};
std::string opSelectorToRegSym(int opIdx, int numRegs=0);
int opSelectorToRegIdx(int opIdx, int numScalarRegs);
bool isPosConstVal(int opIdx);
bool isNegConstVal(int opIdx);
bool isConstVal(int opIdx);
bool isLiteral(int opIdx);
bool isScalarReg(int opIdx);
bool isVectorReg(int opIdx);
bool isFlatScratchReg(int opIdx);
bool isExecMask(int opIdx);
bool isVccReg(int opIdx);
} // namespace VegaISA
#endif // __ARCH_VEGA_REGISTERS_HH__

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_GPU_TYPES_HH__
#define __ARCH_VEGA_GPU_TYPES_HH__
#include <cstdint>
namespace VegaISA
{
union InstFormat;
/**
* used to represnt a GPU inst in its raw format. VEGA
* instructions may be 32b or 64b, therefore we represent
* a raw inst with 64b to ensure that all of its inst data,
* including potential immediate values, may be represented
* in the worst case.
*/
typedef uint64_t RawMachInst;
/**
* used to represent the encoding of a VEGA inst. each portion
* of a VEGA inst must be 1 DWORD (32b), so we use a pointer
* to InstFormat type (which is 32b). for the case in which we
* need multiple DWORDS to represnt a single inst, this pointer
* essentialy acts as an array of the DWORDs needed to represent
* the entire inst encoding.
*/
typedef InstFormat *MachInst;
} // namespace VegaISA
#endif // __ARCH_VEGA_GPU_TYPES_HH__

View File

@@ -0,0 +1,58 @@
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
#include "arch/amdgpu/vega/gpu_decoder.hh"
#include "arch/amdgpu/vega/insts/instructions.hh"
#include "debug/GPUExec.hh"
#include "gpu-compute/flexible_pool_manager.hh"
#include "gpu-compute/shader.hh"
namespace VegaISA
{
VEGAGPUStaticInst::VEGAGPUStaticInst(const std::string &opcode)
: GPUStaticInst(opcode), _srcLiteral(0)
{
}
VEGAGPUStaticInst::~VEGAGPUStaticInst()
{
}
void
VEGAGPUStaticInst::panicUnimplemented() const
{
fatal("Encountered unimplemented VEGA instruction: %s\n", _opcode);
}
} // namespace VegaISA

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_INSTS_GPU_STATIC_INST_HH__
#define __ARCH_VEGA_INSTS_GPU_STATIC_INST_HH__
#include "arch/amdgpu/vega/gpu_registers.hh"
#include "arch/amdgpu/vega/operand.hh"
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/scalar_register_file.hh"
#include "gpu-compute/vector_register_file.hh"
#include "gpu-compute/wavefront.hh"
namespace VegaISA
{
class VEGAGPUStaticInst : public GPUStaticInst
{
public:
VEGAGPUStaticInst(const std::string &opcode);
~VEGAGPUStaticInst();
void generateDisassembly() override { disassembly = _opcode; }
bool
isFlatScratchRegister(int opIdx) override
{
return isFlatScratchReg(opIdx);
}
bool isScalarRegister(int opIdx) override { return false; }
bool isVectorRegister(int opIdx) override { return false; }
bool isSrcOperand(int opIdx) override { return false; }
bool isDstOperand(int opIdx) override { return false; }
int getOperandSize(int opIdx) override { return 0; }
int
getRegisterIndex(int opIdx, int num_scalar_regs) override
{
return 0;
}
/**
* Return the number of tokens needed by the coalescer. In VEGA there
* is generally one packet per memory request per lane generated. In
* HSAIL, the number of dest operands is used for loads and src
* operands for stores. This method should be overriden on a per-inst
* basis when this value differs.
*/
int coalescerTokenCount() const override { return 1; }
ScalarRegU32 srcLiteral() const override { return _srcLiteral; }
protected:
void panicUnimplemented() const;
/**
* if the instruction has a src literal - an immediate
* value that is part of the instruction stream - we
* store that here
*/
ScalarRegU32 _srcLiteral;
}; // class VEGAGPUStaticInst
} // namespace VegaISA
#endif //__ARCH_VEGA_INSTS_GPU_STATIC_INST_HH__

View File

@@ -0,0 +1,894 @@
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_INSTS_INST_UTIL_HH__
#define __ARCH_VEGA_INSTS_INST_UTIL_HH__
#include <cmath>
#include "arch/amdgpu/vega/gpu_registers.hh"
// values for SDWA select operations
enum SDWASelVals : int
{
SDWA_BYTE_0 = 0, /* select data[7:0] */
SDWA_BYTE_1 = 1, /* select data[15:8] */
SDWA_BYTE_2 = 2, /* select data[23:16] */
SDWA_BYTE_3 = 3, /* select data[31:24] */
SDWA_WORD_0 = 4, /* select data[15:0] */
SDWA_WORD_1 = 5, /* select data[31:16] */
SDWA_DWORD = 6 /* select data[31:0] */
};
// values for format of destination bits for SDWA operations
enum SDWADstVals : int
{
SDWA_UNUSED_PAD = 0, /* Pad all unused bits with 0 */
SDWA_UNUSED_SEXT = 1, /* Sign-extend upper bits; pad lower bits w/ 0 */
SDWA_UNUSED_PRESERVE = 2 /* select data[31:0] */
};
// values for DPP operations
enum SqDPPVals : int
{
SQ_DPP_QUAD_PERM_MAX = 0xFF,
SQ_DPP_RESERVED = 0x100,
SQ_DPP_ROW_SL1 = 0x101,
SQ_DPP_ROW_SL15 = 0x10F,
SQ_DPP_ROW_SR1 = 0x111,
SQ_DPP_ROW_SR15 = 0x11F,
SQ_DPP_ROW_RR1 = 0x121,
SQ_DPP_ROW_RR15 = 0x12F,
SQ_DPP_WF_SL1 = 0x130,
SQ_DPP_WF_RL1 = 0x134,
SQ_DPP_WF_SR1 = 0x138,
SQ_DPP_WF_RR1 = 0x13C,
SQ_DPP_ROW_MIRROR = 0x140,
SQ_DPP_ROW_HALF_MIRROR = 0x141,
SQ_DPP_ROW_BCAST15 = 0x142,
SQ_DPP_ROW_BCAST31 = 0x143
};
static const int ROW_SIZE = 16; /* 16 registers per row */
static const int NUM_BANKS = 4; /* 64 registers, 16/bank */
namespace VegaISA
{
template<typename T>
inline T
wholeQuadMode(T val)
{
T wqm = 0;
T mask = 0xF;
for (T bits = val; mask != 0; mask <<= 4)
if ((bits & mask) != 0)
wqm |= mask;
return wqm;
}
template<typename T>
inline T
quadMask(T val)
{
T qmsk = 0;
T mask = 0xF;
T qbit = 0x1;
for (T bits = val; mask != 0; mask <<= 4, qbit <<= 1) {
if (bits & mask) {
qmsk |= qbit;
}
}
return qmsk;
}
template<typename T>
inline ScalarRegI32
countZeroBits(T val)
{
ScalarRegI32 num_zeros
= std::numeric_limits<T>::digits - popCount(val);
return num_zeros;
}
template<typename T>
inline ScalarRegI32
findFirstZero(T val)
{
if (val == ~T(0)) {
return -1;
}
return findLsbSet(~val);
}
template<typename T>
inline ScalarRegI32
findFirstOne(T val)
{
if (!val) {
return -1;
}
return findLsbSet(val);
}
template<typename T>
inline ScalarRegI32
findFirstOneMsb(T val)
{
if (!val) {
return -1;
}
return findMsbSet(val);
}
template<typename T>
inline ScalarRegI32
countZeroBitsMsb(T val)
{
if (!val) {
return -1;
}
return std::numeric_limits<T>::digits - 1 - findMsbSet(val);
}
inline ScalarRegI32
firstOppositeSignBit(ScalarRegI32 val)
{
bool found(false);
bool sign_bit = (val & 0x80000000) != 0;
ScalarRegU32 tmp_val(0);
int count(0);
if (!val || val == -1) {
return -1;
}
for (int i = 0; i < std::numeric_limits<ScalarRegU32>::digits; ++i) {
tmp_val = val & (0x80000000 >> i);
if (!sign_bit) {
if (tmp_val) {
found = true;
break;
}
} else {
if (!tmp_val) {
found = true;
break;
}
}
++count;
}
if (found) {
return count;
} else {
return -1;
}
}
inline ScalarRegI32
firstOppositeSignBit(ScalarRegI64 val)
{
bool found(false);
bool sign_bit = (val & 0x8000000000000000ULL) != 0;
ScalarRegU64 tmp_val(0);
int count(0);
if (!val || val == -1) {
return -1;
}
for (int i = 0; i < std::numeric_limits<ScalarRegU64>::digits; ++i) {
tmp_val = val & (0x8000000000000000ULL >> i);
if (!sign_bit) {
if (tmp_val) {
found = true;
break;
}
} else {
if (!tmp_val) {
found = true;
break;
}
}
++count;
}
if (found) {
return count;
} else {
return -1;
}
}
template<typename T>
inline T
median(T val_0, T val_1, T val_2)
{
if (std::is_floating_point<T>::value) {
return std::fmax(std::fmin(val_0, val_1),
std::fmin(std::fmax(val_0, val_1), val_2));
} else {
return std::max(std::min(val_0, val_1),
std::min(std::max(val_0, val_1), val_2));
}
}
template <typename T>
inline T roundNearestEven(T val)
{
T int_part = 0;
T nearest_round = std::floor(val + 0.5);
if ((int)std::floor(val) % 2 == 0
&& std::modf(std::abs(val), &int_part) == 0.5) {
nearest_round = nearest_round - 1;
}
return nearest_round;
}
inline VecElemU32
muladd(VecElemU64 &dst, VecElemU32 val_0, VecElemU32 val_1,
VecElemU64 val_2)
{
__uint128_t u0 = (__uint128_t)val_0;
__uint128_t u1 = (__uint128_t)val_1;
__uint128_t u2 = (__uint128_t)val_2;
__uint128_t result = u0 * u1 + u2;
dst = (VecElemU64)result;
return (VecElemU32)(result >> 64) ? 1 : 0;
}
inline VecElemU32
muladd(VecElemI64 &dst, VecElemI32 val_0, VecElemI32 val_1,
VecElemI64 val_2)
{
__int128_t u0 = (__int128_t)val_0;
__int128_t u1 = (__int128_t)val_1;
__int128_t u2 = (__int128_t)val_2;
__int128_t result = u0 * u1 + u2;
dst = (VecElemI64)result;
return (VecElemU32)(result >> 64) ? 1 : 0;
}
/**
* dppInstImpl is a helper function that performs the inputted operation
* on the inputted vector register lane. The returned output lane
* represents the input lane given the destination lane and DPP_CTRL word.
*
* Currently the values are:
* 0x0 - 0xFF: full permute of four threads
* 0x100: reserved
* 0x101 - 0x10F: row shift right by 1-15 threads
* 0x111 - 0x11F: row shift right by 1-15 threads
* 0x121 - 0x12F: row shift right by 1-15 threads
* 0x130: wavefront left shift by 1 thread
* 0x134: wavefront left rotate by 1 thread
* 0x138: wavefront right shift by 1 thread
* 0x13C: wavefront right rotate by 1 thread
* 0x140: mirror threads within row
* 0x141: mirror threads within 1/2 row (8 threads)
* 0x142: broadcast 15th thread of each row to next row
* 0x143: broadcast thread 31 to rows 2 and 3
*/
int dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum,
int rowOffset, bool & outOfBounds)
{
// local variables
// newLane will be the same as the input lane unless swizzling happens
int newLane = currLane;
// for shift/rotate permutations; positive values are LEFT rotates
int count = 1;
int localRowOffset = rowOffset;
int localRowNum = rowNum;
if (dppCtrl <= SQ_DPP_QUAD_PERM_MAX) { // DPP_QUAD_PERM{00:FF}
int quadBase = (currLane & ~(3));
int quadPix = (currLane & 3);
quadPix = ((dppCtrl >> (2 * quadPix)) & 3);
newLane = (quadBase | quadPix);
} else if (dppCtrl == SQ_DPP_RESERVED) {
panic("ERROR: instruction using reserved DPP_CTRL value\n");
} else if ((dppCtrl >= SQ_DPP_ROW_SL1) &&
(dppCtrl <= SQ_DPP_ROW_SL15)) { // DPP_ROW_SL{1:15}
count -= (dppCtrl - SQ_DPP_ROW_SL1 + 1);
if ((localRowOffset + count >= 0) &&
(localRowOffset + count < ROW_SIZE)) {
localRowOffset += count;
newLane = (rowNum | localRowOffset);
} else {
outOfBounds = true;
}
} else if ((dppCtrl >= SQ_DPP_ROW_SR1) &&
(dppCtrl <= SQ_DPP_ROW_SR15)) { // DPP_ROW_SR{1:15}
count -= (dppCtrl - SQ_DPP_ROW_SR1 + 1);
if ((localRowOffset + count >= 0) &&
(localRowOffset + count < ROW_SIZE)) {
localRowOffset += count;
newLane = (rowNum | localRowOffset);
} else {
outOfBounds = true;
}
} else if ((dppCtrl >= SQ_DPP_ROW_RR1) &&
(dppCtrl <= SQ_DPP_ROW_RR15)) { // DPP_ROW_RR{1:15}
count -= (dppCtrl - SQ_DPP_ROW_RR1 + 1);
localRowOffset = (localRowOffset + count + ROW_SIZE) % ROW_SIZE;
newLane = (rowNum | localRowOffset);
} else if (dppCtrl == SQ_DPP_WF_SL1) { // DPP_WF_SL1
count = 1;
if ((currLane >= 0) && (currLane < NumVecElemPerVecReg)) {
newLane += count;
} else {
outOfBounds = true;
}
} else if (dppCtrl == SQ_DPP_WF_RL1) { // DPP_WF_RL1
count = 1;
newLane = (currLane + count + NumVecElemPerVecReg) %
NumVecElemPerVecReg;
} else if (dppCtrl == SQ_DPP_WF_SR1) { // DPP_WF_SR1
count = -1;
int currVal = (currLane + count);
if ((currVal >= 0) && (currVal < NumVecElemPerVecReg)) {
newLane += count;
} else {
outOfBounds = true;
}
} else if (dppCtrl == SQ_DPP_WF_RR1) { // DPP_WF_RR1
count = -1;
newLane = (currLane + count + NumVecElemPerVecReg) %
NumVecElemPerVecReg;
} else if (dppCtrl == SQ_DPP_ROW_MIRROR) { // DPP_ROW_MIRROR
localRowOffset = (15 - localRowOffset);
newLane = (rowNum | localRowOffset);
} else if (dppCtrl == SQ_DPP_ROW_HALF_MIRROR) { // DPP_ROW_HALF_MIRROR
localRowNum = (currLane & -0x7);
localRowOffset = (currLane & 0x7);
localRowOffset = (7 - localRowNum);
newLane = (localRowNum | localRowOffset);
} else if (dppCtrl == SQ_DPP_ROW_BCAST15) { // DPP_ROW_BCAST15
count = 15;
if (currLane > count) {
newLane = (currLane & ~count) - 1;
}
} else if (dppCtrl == SQ_DPP_ROW_BCAST31) { // DPP_ROW_BCAST31
count = 31;
if (currLane > count) {
newLane = (currLane & ~count) - 1;
}
} else {
panic("Unimplemented DPP control operation: %d\n", dppCtrl);
}
return newLane;
}
/**
* processDPP is a helper function for implementing Data Parallel Primitive
* instructions. This function may be called by many different VOP1
* instructions to do operations within a register.
*/
template<typename T>
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst,
T & src0)
{
// local variables
SqDPPVals dppCtrl = (SqDPPVals)dppInst.DPP_CTRL;
int boundCtrl = dppInst.BOUND_CTRL;
int bankMask = dppInst.BANK_MASK;
int rowMask = dppInst.ROW_MASK;
// row, bank info to be calculated per lane
int rowNum = 0, bankNum = 0, rowOffset = 0;
// outLane will be the same as the input lane unless swizzling happens
int outLane = 0;
bool laneDisabled = false;
// flags used for determining if a lane should be written to/reset/etc.
bool outOfBounds = false, zeroSrc = false;
long long threadValid = 0;
/**
* STEP 1a: check if the absolute value (ABS) or negation (NEG) tags
* are set. If so, do the appropriate action(s) on src0 and/or src1.
*
* NOTE: ABS takes priority over NEG.
*/
if (dppInst.SRC0_NEG) {
src0.negModifier();
}
if (dppInst.SRC0_ABS) {
src0.absModifier();
}
// iterate over all register lanes, performing steps 2-4
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
threadValid = (0x1LL << lane);
/**
* STEP 2: check the row and bank mask values. These determine
* which threads are enabled for the subsequent DPP_CTRL
* operations.
*/
rowNum = (lane / ROW_SIZE);
rowOffset = (lane % ROW_SIZE);
bankNum = (rowOffset / NUM_BANKS);
if (((rowMask & (0x1 << rowNum)) == 0) /* row mask */ ||
((bankMask & (0x1 << bankNum)) == 0) /* bank mask */) {
laneDisabled = true;
continue;
}
/**
* STEP 4: Handle the potential values of DPP_CTRL:
* 0x0 - 0xFF: full permute of four threads
* 0x100: reserved
* 0x101 - 0x10F: row shift right by 1-15 threads
* 0x111 - 0x11F: row shift right by 1-15 threads
* 0x121 - 0x12F: row shift right by 1-15 threads
* 0x130: wavefront left shift by 1 thread
* 0x134: wavefront left rotate by 1 thread
* 0x138: wavefront right shift by 1 thread
* 0x13C: wavefront right rotate by 1 thread
* 0x140: mirror threads within row
* 0x141: mirror threads within 1/2 row (8 threads)
* 0x142: broadcast 15th thread of each row to next row
* 0x143: broadcast thread 31 to rows 2 and 3
*/
if (!laneDisabled) {
outLane = dppInstImpl(dppCtrl, lane, rowNum, rowOffset,
outOfBounds);
}
/**
* STEP 4: Implement bound control for disabled threads. If thread
* is disabled but boundCtrl is set, then we need to set the source
* data to 0 (i.e., set this lane to 0).
*/
if (laneDisabled) {
threadValid = 0;
} else if (outOfBounds) {
if (boundCtrl == 1) {
zeroSrc = true;
} else {
threadValid = 0;
}
} else if (!gpuDynInst->exec_mask[lane]) {
if (boundCtrl == 1) {
zeroSrc = true;
} else {
threadValid = 0;
}
}
if (threadValid != 0 && !outOfBounds && !zeroSrc) {
assert(!laneDisabled);
src0[outLane] = src0[lane];
} else if (zeroSrc) {
src0[lane] = 0;
}
// reset for next iteration
laneDisabled = false;
}
}
/**
* processDPP is a helper function for implementing Data Parallel Primitive
* instructions. This function may be called by many different
* VOP2/VOPC instructions to do operations within a register.
*/
template<typename T>
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst,
T & src0, T & src1)
{
/**
* STEP 1b: check if the absolute value (ABS) or negation (NEG) tags
* are set. If so, do the appropriate action(s) on src0 and/or src1.
*
* NOTE: ABS takes priority over NEG.
*/
if (dppInst.SRC1_NEG) {
src1.negModifier();
}
if (dppInst.SRC1_ABS) {
src1.absModifier();
}
// Since only difference for VOP1 and VOP2/VOPC instructions is SRC1,
// which is only used for negation/absolute value, call other version
// to do everything else.
processDPP(gpuDynInst, dppInst, src0);
}
/**
* sdwaInstSrcImpl_helper contains the per-lane code for selecting the
* appropriate bytes/words of the lane and doing the appropriate
* masking/padding/sign extending. It returns the value after these
* operations are done on it.
*/
template<typename T>
T sdwaInstSrcImpl_helper(T currOperVal, const T origOperVal,
const SDWASelVals sel, const bool signExt)
{
// local variables
int low_bit = 0, high_bit = 0;
bool signExt_local = signExt;
T retVal = 0;
// if we're preserving all of the bits, then we can immediately return
if (sel == SDWA_DWORD) {
return currOperVal;
}
if (sel < SDWA_WORD_0) { // we are selecting 1 byte
/*
Process byte 0 first. This code eiter selects the original bits
of byte 0, or makes the bits of the selected byte be byte 0 (and
next either sign extends or zero's out upper bits).
*/
low_bit = (sel * VegaISA::BITS_PER_BYTE);
high_bit = low_bit + VegaISA::MSB_PER_BYTE;
retVal = bits(currOperVal, high_bit, low_bit);
// make sure update propagated, since used next
panic_if(bits(retVal, VegaISA::MSB_PER_BYTE) !=
bits(origOperVal, high_bit),
"ERROR: SDWA byte update not propagated: retVal: %d, "
"orig: %d\n", bits(retVal, VegaISA::MSB_PER_BYTE),
bits(origOperVal, high_bit));
// sign extended value depends on upper-most bit of the new byte 0
signExt_local = (signExt &&
(bits(retVal, VegaISA::MSB_PER_BYTE, 0) & 0x80));
// process all other bytes -- if sign extending, make them 1, else
// all 0's so leave as is
if (signExt_local) {
retVal = (uint32_t)sext<VegaISA::MSB_PER_BYTE>(retVal);
}
} else if (sel < SDWA_DWORD) { // we are selecting 1 word
/*
Process word 0 first. This code eiter selects the original bits
of word 0, or makes the bits of the selected word be word 0 (and
next either sign extends or zero's out upper bits).
*/
low_bit = (sel & 1) * VegaISA::BITS_PER_WORD;
high_bit = low_bit + VegaISA::MSB_PER_WORD;
retVal = bits(currOperVal, high_bit, low_bit);
// make sure update propagated, since used next
panic_if(bits(retVal, VegaISA::MSB_PER_WORD) !=
bits(origOperVal, high_bit),
"ERROR: SDWA word update not propagated: retVal: %d, "
"orig: %d\n",
bits(retVal, VegaISA::MSB_PER_WORD),
bits(origOperVal, high_bit));
// sign extended value depends on upper-most bit of the new word 0
signExt_local = (signExt &&
(bits(retVal, VegaISA::MSB_PER_WORD, 0) &
0x8000));
// process other word -- if sign extending, make them 1, else all
// 0's so leave as is
if (signExt_local) {
retVal = (uint32_t)sext<VegaISA::MSB_PER_WORD>(retVal);
}
} else {
assert(sel != SDWA_DWORD); // should have returned earlier
panic("Unimplemented SDWA select operation: %d\n", sel);
}
return retVal;
}
/**
* sdwaInstSrcImpl is a helper function that selects the appropriate
* bits/bytes for each lane of the inputted source operand of an SDWA
* instruction, does the appropriate masking/padding/sign extending for the
* non-selected bits/bytes, and updates the operands values with the
* resultant value.
*
* The desired behavior is:
* 1. Select the appropriate bits/bytes based on sel:
* 0 (SDWA_BYTE_0): select data[7:0]
* 1 (SDWA_BYTE_1): select data[15:8]
* 2 (SDWA_BYTE_2): select data[23:16]
* 3 (SDWA_BYTE_3): select data[31:24]
* 4 (SDWA_WORD_0): select data[15:0]
* 5 (SDWA_WORD_1): select data[31:16]
* 6 (SDWA_DWORD): select data[31:0]
* 2. if sign extend is set, then sign extend the value
*/
template<typename T>
void sdwaInstSrcImpl(T & currOper, T & origCurrOper,
const SDWASelVals sel, const bool signExt)
{
// iterate over all lanes, setting appropriate, selected value
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
currOper[lane] = sdwaInstSrcImpl_helper(currOper[lane],
origCurrOper[lane], sel,
signExt);
}
}
/**
* sdwaInstDstImpl_helper contains the per-lane code for selecting the
* appropriate bytes/words of the lane and doing the appropriate
* masking/padding/sign extending. It returns the value after these
* operations are done on it.
*/
template<typename T>
T sdwaInstDstImpl_helper(T currDstVal, const T origDstVal,
const bool clamp, const SDWASelVals sel,
const SDWADstVals unusedBits_format)
{
// local variables
int low_bit = 0, high_bit = 0;
bool signExt = (unusedBits_format == SDWA_UNUSED_SEXT);
//bool pad = (unusedBits_format == SDWA_UNUSED_PAD);
bool preserve = (unusedBits_format == SDWA_UNUSED_PRESERVE);
T retVal = 0, origBits_thisByte = 0, currBits_thisByte = 0,
origBits_thisWord = 0, currBits_thisWord = 0, newBits = 0;
// if we're preserving all of the bits, then we can immediately return
if (unusedBits_format == SDWA_UNUSED_PRESERVE) {
assert(sel == SDWA_DWORD);
return currDstVal;
} else if (sel == SDWA_DWORD) {
// NOTE: users may set the unused bits variable to anything in this
// scenario, because it will be ignored
return currDstVal;
}
if (sel < SDWA_WORD_0) { // we are selecting 1 byte
// if we sign extended depends on upper-most bit of byte 0
signExt = (signExt &&
(bits(currDstVal, VegaISA::MSB_PER_WORD, 0) & 0x80));
for (int byte = 0; byte < 4; ++byte) {
low_bit = byte * VegaISA::BITS_PER_BYTE;
high_bit = low_bit + VegaISA::MSB_PER_BYTE;
/*
Options:
1. byte == sel: we are keeping all bits in this byte
2. preserve is set: keep this byte as is because the
output preserve flag is set
3. byte > sel && signExt: we're sign extending and
this byte is one of the bytes we need to sign extend
*/
origBits_thisByte = bits(origDstVal, high_bit, low_bit);
currBits_thisByte = bits(currDstVal, high_bit, low_bit);
newBits = ((byte == sel) ? origBits_thisByte :
((preserve) ? currBits_thisByte :
(((byte > sel) && signExt) ? 0xff : 0)));
retVal = insertBits(retVal, high_bit, low_bit, newBits);
}
} else if (sel < SDWA_DWORD) { // we are selecting 1 word
low_bit = 0;
high_bit = low_bit + VegaISA::MSB_PER_WORD;
// if we sign extended depends on upper-most bit of word 0
signExt = (signExt &&
(bits(currDstVal, high_bit, low_bit) & 0x8000));
for (int word = 0; word < 2; ++word) {
low_bit = word * VegaISA::BITS_PER_WORD;
high_bit = low_bit + VegaISA::MSB_PER_WORD;
/*
Options:
1. word == sel & 1: we are keeping all bits in this word
2. preserve is set: keep this word as is because the
output preserve flag is set
3. word > (sel & 1) && signExt: we're sign extending and
this word is one of the words we need to sign extend
*/
origBits_thisWord = bits(origDstVal, high_bit, low_bit);
currBits_thisWord = bits(currDstVal, high_bit, low_bit);
newBits = ((word == (sel & 0x1)) ? origBits_thisWord :
((preserve) ? currBits_thisWord :
(((word > (sel & 0x1)) && signExt) ? 0xffff : 0)));
retVal = insertBits(retVal, high_bit, low_bit, newBits);
}
} else {
assert(sel != SDWA_DWORD); // should have returned earlier
panic("Unimplemented SDWA select operation: %d\n", sel);
}
return retVal;
}
/**
* sdwaInstDestImpl is a helper function that selects the appropriate
* bits/bytes for the inputted dest operand of an SDWA instruction, does
* the appropriate masking/padding/sign extending for the non-selected
* bits/bytes, and updates the operands values with the resultant value.
*
* The desired behavior is:
* 1. Select the appropriate bits/bytes based on sel:
* 0 (SDWA_BYTE_0): select data[7:0]
* 1 (SDWA_BYTE_1): select data[15:8]
* 2 (SDWA_BYTE_2): select data[23:16]
* 3 (SDWA_BYTE_3): select data[31:24]
* 4 (SDWA_WORD_0): select data[15:0]
* 5 (SDWA_WORD_1): select data[31:16]
* 6 (SDWA_DWORD): select data[31:0]
* 2. either pad, sign extend, or select all bits based on the value of
* unusedBits_format:
* 0 (SDWA_UNUSED_PAD): pad all unused bits with 0
* 1 (SDWA_UNUSED_SEXT): sign-extend upper bits; pad lower bits w/ 0
* 2 (SDWA_UNUSED_PRESERVE): select data[31:0]
*/
template<typename T>
void sdwaInstDstImpl(T & dstOper, T & origDstOper, const bool clamp,
const SDWASelVals sel,
const SDWADstVals unusedBits_format)
{
// iterate over all lanes, setting appropriate, selected value
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
dstOper[lane] = sdwaInstDstImpl_helper(dstOper[lane],
origDstOper[lane], clamp,
sel, unusedBits_format);
}
}
/**
* processSDWA_srcHelper is a helper function for implementing sub d-word
* addressing instructions for the src operands. This function may be
* called by many different VOP1/VOP2/VOPC instructions to do operations
* within a register. This function is also agnostic of which operand it
* is operating on, so that it can be called for any src operand.
*/
template<typename T>
void processSDWA_src_helper(T & currSrc, T & origCurrSrc,
const SDWASelVals src_sel,
const bool src_signExt, const bool src_abs,
const bool src_neg)
{
/**
* STEP 1: check if the absolute value (ABS) or negation (NEG) tags
* are set. If so, do the appropriate action(s) on the src operand.
*
* NOTE: According to the CSim implementation, ABS takes priority over
* NEG.
*/
if (src_neg) {
currSrc.negModifier();
}
if (src_abs) {
currSrc.absModifier();
}
/**
* STEP 2: select the appropriate bits for each lane of source operand.
*/
sdwaInstSrcImpl(currSrc, origCurrSrc, src_sel, src_signExt);
}
/**
* processSDWA_src is a helper function for implementing sub d-word
* addressing instructions for the src operands. This function may be
* called by many different VOP1 instructions to do operations within a
* register. processSDWA_dst is called after the math, while
* processSDWA_src is called before the math.
*/
template<typename T>
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T & src0, T & origSrc0)
{
// local variables
const SDWASelVals src0_sel = (SDWASelVals)sdwaInst.SRC0_SEL;
const bool src0_signExt = sdwaInst.SRC0_SEXT;
const bool src0_neg = sdwaInst.SRC0_NEG;
const bool src0_abs = sdwaInst.SRC0_ABS;
// NOTE: difference between VOP1 and VOP2/VOPC is that there is no src1
// operand. So ensure that SRC1 fields are not set, then call helper
// function only on src0.
assert(!sdwaInst.SRC1_SEXT);
assert(!sdwaInst.SRC1_NEG);
assert(!sdwaInst.SRC1_ABS);
processSDWA_src_helper(src0, origSrc0, src0_sel, src0_signExt,
src0_abs, src0_neg);
}
/**
* processSDWA_src is a helper function for implementing sub d-word
* addressing instructions. This function may be called by many different
* VOP2/VOPC instructions to do operations within a register.
* processSDWA_dst is called after the math, while processSDWA_src is
* called before the math.
*/
template<typename T>
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T & src0, T & origSrc0,
T & src1, T & origSrc1)
{
// local variables
const SDWASelVals src0_sel = (SDWASelVals)sdwaInst.SRC0_SEL;
const bool src0_signExt = sdwaInst.SRC0_SEXT;
const bool src0_neg = sdwaInst.SRC0_NEG;
const bool src0_abs = sdwaInst.SRC0_ABS;
const SDWASelVals src1_sel = (SDWASelVals)sdwaInst.SRC1_SEL;
const bool src1_signExt = sdwaInst.SRC1_SEXT;
const bool src1_neg = sdwaInst.SRC1_NEG;
const bool src1_abs = sdwaInst.SRC1_ABS;
processSDWA_src_helper(src0, origSrc0, src0_sel, src0_signExt,
src0_abs, src0_neg);
processSDWA_src_helper(src1, origSrc1, src1_sel, src1_signExt,
src1_abs, src1_neg);
}
/**
* processSDWA_dst is a helper function for implementing sub d-word
* addressing instructions for the dst operand. This function may be
* called by many different VOP1/VOP2/VOPC instructions to do operations
* within a register. processSDWA_dst is called after the math, while
* processSDWA_src is called before the math.
*/
template<typename T>
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T & dst, T & origDst)
{
// local variables
const SDWADstVals dst_unusedBits_format =
(SDWADstVals)sdwaInst.DST_UNUSED;
const SDWASelVals dst_sel = (SDWASelVals)sdwaInst.DST_SEL;
const bool clamp = sdwaInst.CLAMP;
/**
* STEP 1: select the appropriate bits for dst and pad/sign-extend as
* appropriate.
*/
sdwaInstDstImpl(dst, origDst, clamp, dst_sel, dst_unusedBits_format);
}
} // namespace VegaISA
#endif // __ARCH_VEGA_INSTS_INST_UTIL_HH__

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,834 @@
/*
* Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
#define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
#include "arch/amdgpu/vega/gpu_decoder.hh"
#include "arch/amdgpu/vega/gpu_mem_helpers.hh"
#include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
#include "arch/amdgpu/vega/operand.hh"
#include "debug/GPUExec.hh"
#include "debug/VEGA.hh"
#include "mem/ruby/system/RubySystem.hh"
namespace VegaISA
{
struct BufferRsrcDescriptor
{
uint64_t baseAddr : 48;
uint32_t stride : 14;
uint32_t cacheSwizzle : 1;
uint32_t swizzleEn : 1;
uint32_t numRecords : 32;
uint32_t dstSelX : 3;
uint32_t dstSelY : 3;
uint32_t dstSelZ : 3;
uint32_t dstSelW : 3;
uint32_t numFmt : 3;
uint32_t dataFmt : 4;
uint32_t elemSize : 2;
uint32_t idxStride : 2;
uint32_t addTidEn : 1;
uint32_t atc : 1;
uint32_t hashEn : 1;
uint32_t heap : 1;
uint32_t mType : 3;
uint32_t type : 2;
};
// --- purely virtual instruction classes ---
class Inst_SOP2 : public VEGAGPUStaticInst
{
public:
Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_SOP2 instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_SOP2 *);
}; // Inst_SOP2
class Inst_SOPK : public VEGAGPUStaticInst
{
public:
Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
~Inst_SOPK();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_SOPK instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_SOPK *);
}; // Inst_SOPK
class Inst_SOP1 : public VEGAGPUStaticInst
{
public:
Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
~Inst_SOP1();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_SOP1 instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_SOP1 *);
}; // Inst_SOP1
class Inst_SOPC : public VEGAGPUStaticInst
{
public:
Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
~Inst_SOPC();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_SOPC instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_SOPC *);
}; // Inst_SOPC
class Inst_SOPP : public VEGAGPUStaticInst
{
public:
Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
~Inst_SOPP();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_SOPP instData;
}; // Inst_SOPP
class Inst_SMEM : public VEGAGPUStaticInst
{
public:
Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
~Inst_SMEM();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
/**
* initiate a memory read access for N dwords
*/
template<int N>
void
initMemRead(GPUDynInstPtr gpuDynInst)
{
initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
MemCmd::ReadReq);
}
/**
* initiate a memory write access for N dwords
*/
template<int N>
void
initMemWrite(GPUDynInstPtr gpuDynInst)
{
initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
MemCmd::WriteReq);
}
/**
* For normal s_load_dword/s_store_dword instruction addresses.
*/
void
calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr,
ScalarRegU32 offset)
{
Addr vaddr = ((addr.rawData() + offset) & ~0x3);
gpu_dyn_inst->scalarAddr = vaddr;
}
/**
* For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
* The s_buffer instructions use the same buffer resource descriptor
* as the MUBUF instructions.
*/
void
calcAddr(GPUDynInstPtr gpu_dyn_inst,
ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
{
BufferRsrcDescriptor rsrc_desc;
ScalarRegU32 clamped_offset(offset);
std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
sizeof(BufferRsrcDescriptor));
/**
* The address is clamped if:
* Stride is zero: clamp if offset >= num_records
* Stride is non-zero: clamp if offset > (stride * num_records)
*/
if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
clamped_offset = rsrc_desc.numRecords;
} else if (rsrc_desc.stride && offset
> (rsrc_desc.stride * rsrc_desc.numRecords)) {
clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
}
Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
gpu_dyn_inst->scalarAddr = vaddr;
}
// first instruction DWORD
InFmt_SMEM instData;
// second instruction DWORD
InFmt_SMEM_1 extData;
}; // Inst_SMEM
class Inst_VOP2 : public VEGAGPUStaticInst
{
public:
Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
~Inst_VOP2();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_VOP2 instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_VOP2 *);
}; // Inst_VOP2
class Inst_VOP1 : public VEGAGPUStaticInst
{
public:
Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
~Inst_VOP1();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_VOP1 instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_VOP1 *);
}; // Inst_VOP1
class Inst_VOPC : public VEGAGPUStaticInst
{
public:
Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
~Inst_VOPC();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_VOPC instData;
// possible second DWORD
InstFormat extData;
uint32_t varSize;
private:
bool hasSecondDword(InFmt_VOPC *);
}; // Inst_VOPC
class Inst_VINTRP : public VEGAGPUStaticInst
{
public:
Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
~Inst_VINTRP();
int instSize() const override;
protected:
// first instruction DWORD
InFmt_VINTRP instData;
}; // Inst_VINTRP
class Inst_VOP3 : public VEGAGPUStaticInst
{
public:
Inst_VOP3(InFmt_VOP3*, const std::string &opcode, bool sgpr_dst);
~Inst_VOP3();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_VOP3 instData;
// second instruction DWORD
InFmt_VOP3_1 extData;
private:
bool hasSecondDword(InFmt_VOP3 *);
/**
* the v_cmp and readlane instructions in the VOP3
* encoding are unique because they are the only
* instructions that use the VDST field to specify
* a scalar register destination. for VOP3::V_CMP insts
* VDST specifies the arbitrary SGPR pair used to write
* VCC. for V_READLANE VDST specifies the SGPR to return
* the value of the selected lane in the source VGPR
* from which we are reading.
*/
const bool sgprDst;
}; // Inst_VOP3
class Inst_VOP3_SDST_ENC : public VEGAGPUStaticInst
{
public:
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC*, const std::string &opcode);
~Inst_VOP3_SDST_ENC();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
InFmt_VOP3_SDST_ENC instData;
// second instruction DWORD
InFmt_VOP3_1 extData;
private:
bool hasSecondDword(InFmt_VOP3_SDST_ENC *);
}; // Inst_VOP3_SDST_ENC
class Inst_DS : public VEGAGPUStaticInst
{
public:
Inst_DS(InFmt_DS*, const std::string &opcode);
~Inst_DS();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
template<typename T>
void
initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
{
Wavefront *wf = gpuDynInst->wavefront();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
Addr vaddr = gpuDynInst->addr[lane] + offset;
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
= wf->ldsChunk->read<T>(vaddr);
}
}
}
template<typename T>
void
initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
{
Wavefront *wf = gpuDynInst->wavefront();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
= wf->ldsChunk->read<T>(vaddr0);
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
= wf->ldsChunk->read<T>(vaddr1);
}
}
}
template<typename T>
void
initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
{
Wavefront *wf = gpuDynInst->wavefront();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
Addr vaddr = gpuDynInst->addr[lane] + offset;
wf->ldsChunk->write<T>(vaddr,
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
}
}
}
template<typename T>
void
initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
{
Wavefront *wf = gpuDynInst->wavefront();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
gpuDynInst->d_data))[lane * 2]);
wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
gpuDynInst->d_data))[lane * 2 + 1]);
}
}
}
void
calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
{
Wavefront *wf = gpuDynInst->wavefront();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
gpuDynInst->addr.at(lane) = (Addr)addr[lane];
}
}
}
// first instruction DWORD
InFmt_DS instData;
// second instruction DWORD
InFmt_DS_1 extData;
}; // Inst_DS
class Inst_MUBUF : public VEGAGPUStaticInst
{
public:
Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
~Inst_MUBUF();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
template<typename T>
void
initMemRead(GPUDynInstPtr gpuDynInst)
{
// temporarily modify exec_mask to supress memory accesses to oob
// regions. Only issue memory requests for lanes that have their
// exec_mask set and are not out of bounds.
VectorMask old_exec_mask = gpuDynInst->exec_mask;
gpuDynInst->exec_mask &= ~oobMask;
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
gpuDynInst->exec_mask = old_exec_mask;
}
template<int N>
void
initMemRead(GPUDynInstPtr gpuDynInst)
{
// temporarily modify exec_mask to supress memory accesses to oob
// regions. Only issue memory requests for lanes that have their
// exec_mask set and are not out of bounds.
VectorMask old_exec_mask = gpuDynInst->exec_mask;
gpuDynInst->exec_mask &= ~oobMask;
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
gpuDynInst->exec_mask = old_exec_mask;
}
template<typename T>
void
initMemWrite(GPUDynInstPtr gpuDynInst)
{
// temporarily modify exec_mask to supress memory accesses to oob
// regions. Only issue memory requests for lanes that have their
// exec_mask set and are not out of bounds.
VectorMask old_exec_mask = gpuDynInst->exec_mask;
gpuDynInst->exec_mask &= ~oobMask;
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
gpuDynInst->exec_mask = old_exec_mask;
}
template<int N>
void
initMemWrite(GPUDynInstPtr gpuDynInst)
{
// temporarily modify exec_mask to supress memory accesses to oob
// regions. Only issue memory requests for lanes that have their
// exec_mask set and are not out of bounds.
VectorMask old_exec_mask = gpuDynInst->exec_mask;
gpuDynInst->exec_mask &= ~oobMask;
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
gpuDynInst->exec_mask = old_exec_mask;
}
void
injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
{
// create request and set flags
gpuDynInst->resetEntireStatusVector();
gpuDynInst->setStatusVector(0, 1);
auto req = std::make_shared<Request>(0, 0, 0, 0,
gpuDynInst->computeUnit()->masterId(),
0, gpuDynInst->wfDynId);
gpuDynInst->setRequestFlags(req);
gpuDynInst->computeUnit()->
injectGlobalMemFence(gpuDynInst, false, req);
}
/**
* MUBUF insructions calculate their addresses as follows:
*
* index = (IDXEN ? vgpr_idx : 0) + (const_add_tid_en ? TID : 0)
* offset = (OFFEN ? vgpr_off : 0) + inst_off
*
* / ====================== LINEAR ADDRESSING ====================== /
* VADDR = base + sgpr_off + offset + stride * index
*
* / ===================== SWIZZLED ADDRESSING ===================== /
* index_msb = index / const_index_stride
* index_lsb = index % const_index_stride
* offset_msb = offset / const_element_size
* offset_lsb = offset % const_element_size
* buffer_offset = ((index_msb * stride + offset_msb *
* const_element_size) * const_index_stride +
* index_lsb * const_element_size + offset_lsb)
*
* VADDR = base + sgpr_off + buffer_offset
*/
template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
void
calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
{
Addr vaddr = 0;
Addr base_addr = 0;
Addr stride = 0;
Addr buf_idx = 0;
Addr buf_off = 0;
BufferRsrcDescriptor rsrc_desc;
std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
sizeof(BufferRsrcDescriptor));
base_addr = rsrc_desc.baseAddr;
stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
+ rsrc_desc.stride) : rsrc_desc.stride;
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
vaddr = base_addr + s_offset.rawData();
/**
* first we calculate the buffer's index and offset.
* these will be used for either linear or swizzled
* buffers.
*/
buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
buf_off = v_off[lane] + inst_offset;
/**
* Range check behavior causes out of range accesses to
* to be treated differently. Out of range accesses return
* 0 for loads and are ignored for stores. For
* non-formatted accesses, this is done on a per-lane
* basis.
*/
if (stride == 0 || !rsrc_desc.swizzleEn) {
if (buf_off + stride * buf_idx >=
rsrc_desc.numRecords - s_offset.rawData()) {
DPRINTF(VEGA, "mubuf out-of-bounds condition 1: "
"lane = %d, buffer_offset = %llx, "
"const_stride = %llx, "
"const_num_records = %llx\n",
lane, buf_off + stride * buf_idx,
stride, rsrc_desc.numRecords);
oobMask.set(lane);
continue;
}
}
if (stride != 0 && rsrc_desc.swizzleEn) {
if (buf_idx >= rsrc_desc.numRecords ||
buf_off >= stride) {
DPRINTF(VEGA, "mubuf out-of-bounds condition 2: "
"lane = %d, offset = %llx, "
"index = %llx, "
"const_num_records = %llx\n",
lane, buf_off, buf_idx,
rsrc_desc.numRecords);
oobMask.set(lane);
continue;
}
}
if (rsrc_desc.swizzleEn) {
Addr idx_stride = 8 << rsrc_desc.idxStride;
Addr elem_size = 2 << rsrc_desc.elemSize;
Addr idx_msb = buf_idx / idx_stride;
Addr idx_lsb = buf_idx % idx_stride;
Addr off_msb = buf_off / elem_size;
Addr off_lsb = buf_off % elem_size;
DPRINTF(VEGA, "mubuf swizzled lane %d: "
"idx_stride = %llx, elem_size = %llx, "
"idx_msb = %llx, idx_lsb = %llx, "
"off_msb = %llx, off_lsb = %llx\n",
lane, idx_stride, elem_size, idx_msb, idx_lsb,
off_msb, off_lsb);
vaddr += ((idx_msb * stride + off_msb * elem_size)
* idx_stride + idx_lsb * elem_size + off_lsb);
} else {
vaddr += buf_off + stride * buf_idx;
}
DPRINTF(VEGA, "Calculating mubuf address for lane %d: "
"vaddr = %llx, base_addr = %llx, "
"stride = %llx, buf_idx = %llx, buf_off = %llx\n",
lane, vaddr, base_addr, stride,
buf_idx, buf_off);
gpuDynInst->addr.at(lane) = vaddr;
}
}
}
// first instruction DWORD
InFmt_MUBUF instData;
// second instruction DWORD
InFmt_MUBUF_1 extData;
// Mask of lanes with out-of-bounds accesses. Needs to be tracked
// seperately from the exec_mask so that we remember to write zero
// to the registers associated with out of bounds lanes.
VectorMask oobMask;
}; // Inst_MUBUF
class Inst_MTBUF : public VEGAGPUStaticInst
{
public:
Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
~Inst_MTBUF();
int instSize() const override;
protected:
// first instruction DWORD
InFmt_MTBUF instData;
// second instruction DWORD
InFmt_MTBUF_1 extData;
private:
bool hasSecondDword(InFmt_MTBUF *);
}; // Inst_MTBUF
class Inst_MIMG : public VEGAGPUStaticInst
{
public:
Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
~Inst_MIMG();
int instSize() const override;
protected:
// first instruction DWORD
InFmt_MIMG instData;
// second instruction DWORD
InFmt_MIMG_1 extData;
}; // Inst_MIMG
class Inst_EXP : public VEGAGPUStaticInst
{
public:
Inst_EXP(InFmt_EXP*, const std::string &opcode);
~Inst_EXP();
int instSize() const override;
protected:
// first instruction DWORD
InFmt_EXP instData;
// second instruction DWORD
InFmt_EXP_1 extData;
}; // Inst_EXP
class Inst_FLAT : public VEGAGPUStaticInst
{
public:
Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
~Inst_FLAT();
int instSize() const override;
void generateDisassembly() override;
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
template<typename T>
void
initMemRead(GPUDynInstPtr gpuDynInst)
{
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
}
template<int N>
void
initMemRead(GPUDynInstPtr gpuDynInst)
{
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
}
template<typename T>
void
initMemWrite(GPUDynInstPtr gpuDynInst)
{
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
}
template<int N>
void
initMemWrite(GPUDynInstPtr gpuDynInst)
{
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
}
template<typename T>
void
initAtomicAccess(GPUDynInstPtr gpuDynInst)
{
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
}
void
calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
{
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (gpuDynInst->exec_mask[lane]) {
gpuDynInst->addr.at(lane) = addr[lane];
}
}
gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
}
// first instruction DWORD
InFmt_FLAT instData;
// second instruction DWORD
InFmt_FLAT_1 extData;
}; // Inst_FLAT
} // namespace VegaISA
#endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__

101
src/arch/amdgpu/vega/isa.cc Normal file
View File

@@ -0,0 +1,101 @@
/*
* Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/amdgpu/vega/gpu_isa.hh"
#include <numeric>
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/wavefront.hh"
namespace VegaISA
{
GPUISA::GPUISA(Wavefront &wf) : wavefront(wf), m0(0)
{
}
ScalarRegU32
GPUISA::readMiscReg(int opIdx) const
{
switch (opIdx) {
case REG_M0:
return m0;
case REG_ZERO:
return 0;
case REG_SCC:
return statusReg.SCC;
default:
fatal("attempting to read from unsupported or non-readable "
"register. selector val: %i\n", opIdx);
return 0;
}
}
void
GPUISA::writeMiscReg(int opIdx, ScalarRegU32 operandVal)
{
switch (opIdx) {
case REG_M0:
m0 = operandVal;
break;
case REG_SCC:
statusReg.SCC = operandVal ? 1 : 0;
break;
default:
fatal("attempting to write to an unsupported or non-writable "
"register. selector val: %i\n", opIdx);
break;
}
}
void
GPUISA::advancePC(GPUDynInstPtr gpuDynInst)
{
wavefront.pc(wavefront.pc()
+ gpuDynInst->staticInstruction()->instSize());
}
const std::array<const ScalarRegU32, NumPosConstRegs>
GPUISA::posConstRegs = { {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
} };
const std::array<const ScalarRegI32, NumNegConstRegs>
GPUISA::negConstRegs = { {
-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
-16
} };
} // namespace VegaISA

View File

@@ -0,0 +1,740 @@
/*
* Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ARCH_VEGA_OPERAND_HH__
#define __ARCH_VEGA_OPERAND_HH__
#include <array>
#include "arch/amdgpu/vega/gpu_registers.hh"
#include "arch/generic/vec_reg.hh"
#include "gpu-compute/scalar_register_file.hh"
#include "gpu-compute/vector_register_file.hh"
#include "gpu-compute/wavefront.hh"
/**
* classes that represnt vector/scalar operands in VEGA ISA. these classes
* wrap the generic vector register type (i.e., src/arch/generic/vec_reg.hh)
* and allow them to be manipulated in ways that are unique to VEGA insts.
*/
namespace VegaISA
{
/**
* convenience traits so we can automatically infer the correct FP type
* without looking at the number of dwords (i.e., to determine if we
* need a float or a double when creating FP constants).
*/
template<typename T> struct OpTraits { typedef float FloatT; };
template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };
template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };
class Operand
{
public:
Operand() = delete;
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
: _gpuDynInst(gpuDynInst), _opIdx(opIdx)
{
assert(_gpuDynInst);
assert(_opIdx >= 0);
}
/**
* read from and write to the underlying register(s) that
* this operand is referring to.
*/
virtual void read() = 0;
virtual void write() = 0;
protected:
/**
* instruction object that owns this operand
*/
GPUDynInstPtr _gpuDynInst;
/**
* op selector value for this operand. note that this is not
* the same as the register file index, be it scalar or vector.
* this could refer to inline constants, system regs, or even
* special values.
*/
int _opIdx;
};
template<typename DataType, bool Const, size_t NumDwords>
class ScalarOperand;
template<typename DataType, bool Const,
size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>
class VecOperand final : public Operand
{
static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
"Incorrect number of DWORDS for VEGA operand.");
public:
VecOperand() = delete;
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
: Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
negMod(false), scRegData(gpuDynInst, _opIdx),
vrfData{{ nullptr }}
{
vecReg.zero();
}
~VecOperand()
{
}
/**
* certain vector operands can read from the vrf/srf or constants.
* we use this method to first determine the type of the operand,
* then we read from the appropriate source. if vector we read
* directly from the vrf. if scalar, we read in the data through
* the scalar operand component. this should only be used for VSRC
* operands.
*/
void
readSrc()
{
if (isVectorReg(_opIdx)) {
_opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()
->reservedScalarRegs);
read();
} else {
readScalar();
}
}
/**
* read from the vrf. this should only be used by vector inst
* source operands that are explicitly vector (i.e., VSRC).
*/
void
read() override
{
assert(_gpuDynInst);
assert(_gpuDynInst->wavefront());
assert(_gpuDynInst->computeUnit());
Wavefront *wf = _gpuDynInst->wavefront();
ComputeUnit *cu = _gpuDynInst->computeUnit();
for (auto i = 0; i < NumDwords; ++i) {
int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx + i);
vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);
cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
}
if (NumDwords == 1) {
assert(vrfData[0]);
auto vgpr = vecReg.template as<DataType>();
auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
std::memcpy((void*)&vgpr[lane],
(void*)&reg_file_vgpr[lane], sizeof(DataType));
}
} else if (NumDwords == 2) {
assert(vrfData[0]);
assert(vrfData[1]);
auto vgpr = vecReg.template as<VecElemU64>();
auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
VecElemU64 tmp_val(0);
((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
vgpr[lane] = tmp_val;
}
}
}
/**
* write to the vrf. we maintain a copy of the underlying vector
* reg(s) for this operand (i.e., vrfData/scRegData), as well as a
* temporary vector register representation (i.e., vecReg) of the
* vector register, which allows the execute() methods of instructions
* to easily write their operand data using operator[] regardless of
* their size. after the result is calculated we use write() to write
* the data to the actual register file storage. this allows us to do
* type conversion, etc., in a single call as opposed to doing it
* in each execute() method.
*/
void
write() override
{
assert(_gpuDynInst);
assert(_gpuDynInst->wavefront());
assert(_gpuDynInst->computeUnit());
Wavefront *wf = _gpuDynInst->wavefront();
ComputeUnit *cu = _gpuDynInst->computeUnit();
VectorMask &exec_mask = _gpuDynInst->isLoad()
? _gpuDynInst->exec_mask : wf->execMask();
if (NumDwords == 1) {
int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx);
vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
assert(vrfData[0]);
auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
auto vgpr = vecReg.template as<DataType>();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
std::memcpy((void*)&reg_file_vgpr[lane],
(void*)&vgpr[lane], sizeof(DataType));
}
}
DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);
cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
} else if (NumDwords == 2) {
int vgprIdx0 = cu->registerManager.mapVgpr(wf, _opIdx);
int vgprIdx1 = cu->registerManager.mapVgpr(wf, _opIdx + 1);
vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);
vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);
assert(vrfData[0]);
assert(vrfData[1]);
auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
auto vgpr = vecReg.template as<VecElemU64>();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];
reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];
}
}
DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);
cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);
}
}
void
negModifier()
{
negMod = true;
}
void
absModifier()
{
absMod = true;
}
/**
* getter [] operator. only enable if this operand is constant
* (i.e, a source operand) and if it can be represented using
* primitive types (i.e., 8b to 64b primitives).
*/
template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
typename std::enable_if<Condition, const DataType>::type
operator[](size_t idx) const
{
assert(idx < NumVecElemPerVecReg);
if (scalar) {
DataType ret_val = scRegData.rawData();
if (absMod) {
assert(std::is_floating_point<DataType>::value);
ret_val = std::fabs(ret_val);
}
if (negMod) {
assert(std::is_floating_point<DataType>::value);
ret_val = -ret_val;
}
return ret_val;
} else {
auto vgpr = vecReg.template as<DataType>();
DataType ret_val = vgpr[idx];
if (absMod) {
assert(std::is_floating_point<DataType>::value);
ret_val = std::fabs(ret_val);
}
if (negMod) {
assert(std::is_floating_point<DataType>::value);
ret_val = -ret_val;
}
return ret_val;
}
}
/**
* setter [] operator. only enable if this operand is non-constant
* (i.e, a destination operand) and if it can be represented using
* primitive types (i.e., 8b to 64b primitives).
*/
template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
typename std::enable_if<Condition, DataType&>::type
operator[](size_t idx)
{
assert(!scalar);
assert(idx < NumVecElemPerVecReg);
return vecReg.template as<DataType>()[idx];
}
private:
/**
* if we determine that this operand is a scalar (reg or constant)
* then we read the scalar data into the scalar operand data member.
*/
void
readScalar()
{
scalar = true;
scRegData.read();
}
using VecRegCont = typename std::conditional<NumDwords == 2,
VecRegContainerU64, typename std::conditional<sizeof(DataType)
== sizeof(VecElemU16), VecRegContainerU16,
typename std::conditional<sizeof(DataType)
== sizeof(VecElemU8), VecRegContainerU8,
VecRegContainerU32>::type>::type>::type;
/**
* whether this operand a scalar or not.
*/
bool scalar;
/**
* absolute value and negative modifiers. VOP3 instructions
* may indicate that their input/output operands must be
* modified, either by taking the absolute value or negating
* them. these bools indicate which modifier, if any, to use.
*/
bool absMod;
bool negMod;
/**
* this holds all the operand data in a single vector register
* object (i.e., if an operand is 64b, this will hold the data
* from both registers the operand is using).
*/
VecRegCont vecReg;
/**
* for src operands that read scalars (i.e., scalar regs or
* a scalar constant).
*/
ScalarOperand<DataType, Const, NumDwords> scRegData;
/**
* pointers to the underlyding registers (i.e., the actual
* registers in the register file).
*/
std::array<VecRegContainerU32*, NumDwords> vrfData;
};
template<typename DataType, bool Const,
size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>
class ScalarOperand final : public Operand
{
static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
"Incorrect number of DWORDS for VEGA operand.");
public:
ScalarOperand() = delete;
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
: Operand(gpuDynInst, opIdx)
{
std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));
}
~ScalarOperand()
{
}
/**
* we store scalar data in a std::array, however if we need the
* full operand data we use this method to copy all elements of
* the scalar operand data to a single primitive container. only
* useful for 8b to 64b primitive types, as they are the only types
* that we need to perform computation on.
*/
template<bool Condition = NumDwords == 1 || NumDwords == 2>
typename std::enable_if<Condition, DataType>::type
rawData() const
{
assert(sizeof(DataType) <= sizeof(srfData));
DataType raw_data((DataType)0);
std::memcpy((void*)&raw_data, (void*)srfData.data(),
sizeof(DataType));
return raw_data;
}
void*
rawDataPtr()
{
return (void*)srfData.data();
}
void
read() override
{
Wavefront *wf = _gpuDynInst->wavefront();
ComputeUnit *cu = _gpuDynInst->computeUnit();
if (!isScalarReg(_opIdx)) {
readSpecialVal();
} else {
for (auto i = 0; i < NumDwords; ++i) {
int sgprIdx = regIdx(i);
srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);
DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);
cu->srf[wf->simdId]->printReg(wf, sgprIdx);
}
}
}
void
write() override
{
Wavefront *wf = _gpuDynInst->wavefront();
ComputeUnit *cu = _gpuDynInst->computeUnit();
if (!isScalarReg(_opIdx)) {
if (_opIdx == REG_EXEC_LO) {
ScalarRegU64 new_exec_mask_val
= wf->execMask().to_ullong();
if (NumDwords == 1) {
std::memcpy((void*)&new_exec_mask_val,
(void*)srfData.data(), sizeof(VecElemU32));
} else if (NumDwords == 2) {
std::memcpy((void*)&new_exec_mask_val,
(void*)srfData.data(), sizeof(VecElemU64));
} else {
panic("Trying to write more than 2 DWORDS to EXEC\n");
}
VectorMask new_exec_mask(new_exec_mask_val);
wf->execMask() = new_exec_mask;
DPRINTF(GPUSRF, "Write EXEC\n");
DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
} else if (_opIdx == REG_EXEC_HI) {
/**
* If we're writing only the upper half of the EXEC mask
* this ought to be a single dword operand.
*/
assert(NumDwords == 1);
ScalarRegU32 new_exec_mask_hi_val(0);
ScalarRegU64 new_exec_mask_val
= wf->execMask().to_ullong();
std::memcpy((void*)&new_exec_mask_hi_val,
(void*)srfData.data(), sizeof(new_exec_mask_hi_val));
replaceBits(new_exec_mask_val, 63, 32,
new_exec_mask_hi_val);
VectorMask new_exec_mask(new_exec_mask_val);
wf->execMask() = new_exec_mask;
DPRINTF(GPUSRF, "Write EXEC\n");
DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
} else {
_gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
}
} else {
for (auto i = 0; i < NumDwords; ++i) {
int sgprIdx = regIdx(i);
auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);
if (_gpuDynInst->isLoad()) {
assert(sizeof(DataType) <= sizeof(ScalarRegU64));
sgpr = reinterpret_cast<ScalarRegU32*>(
_gpuDynInst->scalar_data)[i];
} else {
sgpr = srfData[i];
}
DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);
cu->srf[wf->simdId]->printReg(wf, sgprIdx);
}
}
}
/**
* bit access to scalar data. primarily used for setting vcc bits.
*/
template<bool Condition = NumDwords == 1 || NumDwords == 2>
typename std::enable_if<Condition, void>::type
setBit(int bit, int bit_val)
{
DataType &sgpr = *((DataType*)srfData.data());
replaceBits(sgpr, bit, bit_val);
}
template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
typename std::enable_if<Condition, ScalarOperand&>::type
operator=(DataType rhs)
{
std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));
return *this;
}
private:
/**
* we have determined that we are not reading our scalar operand data
* from the register file, so here we figure out which special value
* we are reading (i.e., float constant, int constant, inline
* constant, or various other system registers (e.g., exec mask).
*/
void
readSpecialVal()
{
assert(NumDwords == 1 || NumDwords == 2);
switch(_opIdx) {
case REG_EXEC_LO:
{
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
execMask().to_ullong();
std::memcpy((void*)srfData.data(), (void*)&exec_mask,
sizeof(srfData));
DPRINTF(GPUSRF, "Read EXEC\n");
DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
}
break;
case REG_EXEC_HI:
{
/**
* If we're reading only the upper half of the EXEC mask
* this ought to be a single dword operand.
*/
assert(NumDwords == 1);
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()
->execMask().to_ullong();
ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32);
std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi,
sizeof(srfData));
DPRINTF(GPUSRF, "Read EXEC_HI\n");
DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi);
}
break;
case REG_SRC_SWDA:
case REG_SRC_DPP:
case REG_SRC_LITERAL:
assert(NumDwords == 1);
srfData[0] = _gpuDynInst->srcLiteral();
break;
case REG_POS_HALF:
{
typename OpTraits<DataType>::FloatT pos_half = 0.5;
std::memcpy((void*)srfData.data(), (void*)&pos_half,
sizeof(srfData));
}
break;
case REG_NEG_HALF:
{
typename OpTraits<DataType>::FloatT neg_half = -0.5;
std::memcpy((void*)srfData.data(), (void*)&neg_half,
sizeof(srfData));
}
break;
case REG_POS_ONE:
{
typename OpTraits<DataType>::FloatT pos_one = 1.0;
std::memcpy(srfData.data(), &pos_one, sizeof(srfData));
}
break;
case REG_NEG_ONE:
{
typename OpTraits<DataType>::FloatT neg_one = -1.0;
std::memcpy(srfData.data(), &neg_one, sizeof(srfData));
}
break;
case REG_POS_TWO:
{
typename OpTraits<DataType>::FloatT pos_two = 2.0;
std::memcpy(srfData.data(), &pos_two, sizeof(srfData));
}
break;
case REG_NEG_TWO:
{
typename OpTraits<DataType>::FloatT neg_two = -2.0;
std::memcpy(srfData.data(), &neg_two, sizeof(srfData));
}
break;
case REG_POS_FOUR:
{
typename OpTraits<DataType>::FloatT pos_four = 4.0;
std::memcpy(srfData.data(), &pos_four, sizeof(srfData));
}
break;
case REG_NEG_FOUR:
{
typename OpTraits<DataType>::FloatT neg_four = -4.0;
std::memcpy((void*)srfData.data(), (void*)&neg_four ,
sizeof(srfData));
}
break;
case REG_PI:
{
assert(sizeof(DataType) == sizeof(ScalarRegF64)
|| sizeof(DataType) == sizeof(ScalarRegF32));
const ScalarRegU32 pi_u32(0x3e22f983UL);
const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);
if (sizeof(DataType) == sizeof(ScalarRegF64)) {
std::memcpy((void*)srfData.data(),
(void*)&pi_u64, sizeof(srfData));
} else {
std::memcpy((void*)srfData.data(),
(void*)&pi_u32, sizeof(srfData));
}
}
break;
default:
{
assert(sizeof(DataType) <= sizeof(srfData));
DataType misc_val(0);
if (isConstVal(_opIdx)) {
misc_val = (DataType)_gpuDynInst
->readConstVal<DataType>(_opIdx);
} else {
misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
}
std::memcpy((void*)srfData.data(), (void*)&misc_val,
sizeof(DataType));
}
}
}
/**
* for scalars we need to do some extra work to figure out how to
* map the op selector to the sgpr idx because some op selectors
* do not map directly to the srf (i.e., vcc/flat_scratch).
*/
int
regIdx(int dword) const
{
Wavefront *wf = _gpuDynInst->wavefront();
ComputeUnit *cu = _gpuDynInst->computeUnit();
int sgprIdx(-1);
if (_opIdx == REG_VCC_LO) {
sgprIdx = cu->registerManager
.mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
} else if (_opIdx == REG_FLAT_SCRATCH_HI) {
sgprIdx = cu->registerManager
.mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
} else if (_opIdx == REG_FLAT_SCRATCH_LO) {
assert(NumDwords == 1);
sgprIdx = cu->registerManager
.mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
} else {
sgprIdx = cu->registerManager.mapSgpr(wf, _opIdx + dword);
}
assert(sgprIdx > -1);
return sgprIdx;
}
/**
* in VEGA each register is represented as a 32b unsigned value,
* however operands may require up to 16 registers, so we store
* all the individual 32b components here. for sub-dword operand
* we still consider them to be 1 dword because the minimum size
* of a register is 1 dword. this class will take care to do the
* proper packing/unpacking of sub-dword operands.
*/
std::array<ScalarRegU32, NumDwords> srfData;
};
// typedefs for the various sizes/types of scalar operands
using ScalarOperandU8 = ScalarOperand<ScalarRegU8, false, 1>;
using ScalarOperandI8 = ScalarOperand<ScalarRegI8, false, 1>;
using ScalarOperandU16 = ScalarOperand<ScalarRegU16, false, 1>;
using ScalarOperandI16 = ScalarOperand<ScalarRegI16, false, 1>;
using ScalarOperandU32 = ScalarOperand<ScalarRegU32, false>;
using ScalarOperandI32 = ScalarOperand<ScalarRegI32, false>;
using ScalarOperandF32 = ScalarOperand<ScalarRegF32, false>;
using ScalarOperandU64 = ScalarOperand<ScalarRegU64, false>;
using ScalarOperandI64 = ScalarOperand<ScalarRegI64, false>;
using ScalarOperandF64 = ScalarOperand<ScalarRegF64, false>;
using ScalarOperandU128 = ScalarOperand<ScalarRegU32, false, 4>;
using ScalarOperandU256 = ScalarOperand<ScalarRegU32, false, 8>;
using ScalarOperandU512 = ScalarOperand<ScalarRegU32, false, 16>;
// non-writeable versions of scalar operands
using ConstScalarOperandU8 = ScalarOperand<ScalarRegU8, true, 1>;
using ConstScalarOperandI8 = ScalarOperand<ScalarRegI8, true, 1>;
using ConstScalarOperandU16 = ScalarOperand<ScalarRegU16, true, 1>;
using ConstScalarOperandI16 = ScalarOperand<ScalarRegI16, true, 1>;
using ConstScalarOperandU32 = ScalarOperand<ScalarRegU32, true>;
using ConstScalarOperandI32 = ScalarOperand<ScalarRegI32, true>;
using ConstScalarOperandF32 = ScalarOperand<ScalarRegF32, true>;
using ConstScalarOperandU64 = ScalarOperand<ScalarRegU64, true>;
using ConstScalarOperandI64 = ScalarOperand<ScalarRegI64, true>;
using ConstScalarOperandF64 = ScalarOperand<ScalarRegF64, true>;
using ConstScalarOperandU128 = ScalarOperand<ScalarRegU32, true, 4>;
using ConstScalarOperandU256 = ScalarOperand<ScalarRegU32, true, 8>;
using ConstScalarOperandU512 = ScalarOperand<ScalarRegU32, true, 16>;
// typedefs for the various sizes/types of vector operands
using VecOperandU8 = VecOperand<VecElemU8, false, 1>;
using VecOperandI8 = VecOperand<VecElemI8, false, 1>;
using VecOperandU16 = VecOperand<VecElemU16, false, 1>;
using VecOperandI16 = VecOperand<VecElemI16, false, 1>;
using VecOperandU32 = VecOperand<VecElemU32, false>;
using VecOperandI32 = VecOperand<VecElemI32, false>;
using VecOperandF32 = VecOperand<VecElemF32, false>;
using VecOperandU64 = VecOperand<VecElemU64, false>;
using VecOperandF64 = VecOperand<VecElemF64, false>;
using VecOperandI64 = VecOperand<VecElemI64, false>;
using VecOperandU96 = VecOperand<VecElemU32, false, 3>;
using VecOperandU128 = VecOperand<VecElemU32, false, 4>;
using VecOperandU256 = VecOperand<VecElemU32, false, 8>;
using VecOperandU512 = VecOperand<VecElemU32, false, 16>;
// non-writeable versions of vector operands
using ConstVecOperandU8 = VecOperand<VecElemU8, true, 1>;
using ConstVecOperandI8 = VecOperand<VecElemI8, true, 1>;
using ConstVecOperandU16 = VecOperand<VecElemU16, true, 1>;
using ConstVecOperandI16 = VecOperand<VecElemI16, true, 1>;
using ConstVecOperandU32 = VecOperand<VecElemU32, true>;
using ConstVecOperandI32 = VecOperand<VecElemI32, true>;
using ConstVecOperandF32 = VecOperand<VecElemF32, true>;
using ConstVecOperandU64 = VecOperand<VecElemU64, true>;
using ConstVecOperandI64 = VecOperand<VecElemI64, true>;
using ConstVecOperandF64 = VecOperand<VecElemF64, true>;
using ConstVecOperandU96 = VecOperand<VecElemU32, true, 3>;
using ConstVecOperandU128 = VecOperand<VecElemU32, true, 4>;
using ConstVecOperandU256 = VecOperand<VecElemU32, true, 8>;
using ConstVecOperandU512 = VecOperand<VecElemU32, true, 16>;
}
#endif // __ARCH_VEGA_OPERAND_HH__

View File

@@ -0,0 +1,245 @@
/*
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/amdgpu/vega/gpu_registers.hh"
namespace VegaISA
{
std::string
opSelectorToRegSym(int idx, int numRegs)
{
std::string reg_sym;
// we have an SGPR
if (idx <= REG_SGPR_MAX) {
if (numRegs > 1)
reg_sym = "s[" + std::to_string(idx) + ":" +
std::to_string(idx + numRegs - 1) + "]";
else
reg_sym = "s" + std::to_string(idx);
return reg_sym;
} else if (idx >= REG_VGPR_MIN && idx <= REG_VGPR_MAX) {
if (numRegs > 1)
reg_sym = "v[" + std::to_string(idx - REG_VGPR_MIN) + ":" +
std::to_string(idx - REG_VGPR_MIN + numRegs - 1) + "]";
else
reg_sym = "v" + std::to_string(idx - REG_VGPR_MIN);
return reg_sym;
} else if (idx >= REG_INT_CONST_POS_MIN &&
idx <= REG_INT_CONST_POS_MAX) {
reg_sym = std::to_string(idx - REG_INT_CONST_POS_MIN + 1);
return reg_sym;
} else if (idx >= REG_INT_CONST_NEG_MIN &&
idx <= REG_INT_CONST_NEG_MAX) {
int inline_val = -1 - (idx - REG_INT_CONST_NEG_MIN);
reg_sym = std::to_string(inline_val);
return reg_sym;
}
switch (idx) {
case REG_FLAT_SCRATCH_LO:
reg_sym = "flat_scratch_lo";
break;
case REG_FLAT_SCRATCH_HI:
reg_sym = "flat_scratch_hi";
break;
case REG_VCC_LO:
reg_sym = "vcc";
break;
case REG_M0:
reg_sym = "m0";
break;
case REG_EXEC_LO:
reg_sym = "exec";
break;
case REG_ZERO:
reg_sym = "0";
break;
case REG_POS_HALF:
reg_sym = "0.5";
break;
case REG_NEG_HALF:
reg_sym = "-0.5";
break;
case REG_POS_ONE:
reg_sym = "1";
break;
case REG_NEG_ONE:
reg_sym = "-1";
break;
case REG_POS_TWO:
reg_sym = "2";
break;
case REG_NEG_TWO:
reg_sym = "-2";
break;
case REG_POS_FOUR:
reg_sym = "4";
break;
case REG_NEG_FOUR:
reg_sym = "-4";
break;
default:
fatal("VEGA ISA instruction has unknown register index %u\n", idx);
break;
}
return reg_sym;
}
int
opSelectorToRegIdx(int idx, int numScalarRegs)
{
int regIdx = -1;
if (idx <= REG_SGPR_MAX) {
regIdx = idx;
} else if (idx >= REG_VGPR_MIN && idx <= REG_VGPR_MAX) {
regIdx = idx - REG_VGPR_MIN;
} else if (idx == REG_VCC_LO) {
/**
* the VCC register occupies the two highest numbered
* SRF entries. VCC is typically indexed by specifying
* VCC_LO (simply called VCC) in the instruction encoding
* and reading it as a 64b value so we only return the
* index to the lower half of the VCC register.
*
* VCC_LO = s[NUM_SGPRS - 2]
* VCC_HI = s[NUM_SGPRS - 1]
*
*/
regIdx = numScalarRegs - 2;
} else if (idx == REG_VCC_HI) {
regIdx = numScalarRegs - 1;
} else if (idx == REG_FLAT_SCRATCH_LO) {
/**
* the FLAT_SCRATCH register occupies the two SRF entries
* just below VCC. FLAT_SCRATCH is typically indexed by
* specifying FLAT_SCRATCH_LO (simply called FLAT_SCRATCH)
* in the instruction encoding and reading it as a 64b value
* so we only return the index to the lower half of the
* FLAT_SCRATCH register.
*
* FLAT_SCRATCH_LO = s[NUM_SGPRS - 4]
* FLAT_SCRATCH_HI = s[NUM_SGPRS - 3]
*
*/
regIdx = numScalarRegs - 4;
} else if (idx == REG_FLAT_SCRATCH_HI) {
regIdx = numScalarRegs - 3;
} else if (idx == REG_EXEC_LO || idx == REG_EXEC_HI) {
/**
* If the operand is the EXEC mask we just return the op
* selector value indicating it is the EXEC mask, which is
* not part of any RF. Higher-level calls will understand
* that this resolves to a special system register, not an
* index into an RF.
*/
return idx;
}
return regIdx;
}
bool
isPosConstVal(int opIdx)
{
bool is_pos_const_val = (opIdx >= REG_INT_CONST_POS_MIN
&& opIdx <= REG_INT_CONST_POS_MAX);
return is_pos_const_val;
}
bool
isNegConstVal(int opIdx)
{
bool is_neg_const_val = (opIdx >= REG_INT_CONST_NEG_MIN
&& opIdx <= REG_INT_CONST_NEG_MAX);
return is_neg_const_val;
}
bool
isConstVal(int opIdx)
{
bool is_const_val = isPosConstVal(opIdx) || isNegConstVal(opIdx);
return is_const_val;
}
bool
isLiteral(int opIdx)
{
return opIdx == REG_SRC_LITERAL;
}
bool
isExecMask(int opIdx)
{
return opIdx == REG_EXEC_LO || opIdx == REG_EXEC_HI;
}
bool
isVccReg(int opIdx)
{
return opIdx == REG_VCC_LO || opIdx == REG_VCC_HI;
}
bool
isFlatScratchReg(int opIdx)
{
return opIdx == REG_FLAT_SCRATCH_LO || opIdx == REG_FLAT_SCRATCH_HI;
}
bool
isScalarReg(int opIdx)
{
// FLAT_SCRATCH and VCC are stored in an SGPR pair
if (opIdx <= REG_SGPR_MAX || opIdx == REG_FLAT_SCRATCH_LO ||
opIdx == REG_FLAT_SCRATCH_HI || opIdx == REG_VCC_LO ||
opIdx == REG_VCC_HI) {
return true;
}
return false;
}
bool
isVectorReg(int opIdx)
{
if (opIdx >= REG_VGPR_MIN && opIdx <= REG_VGPR_MAX)
return true;
return false;
}
} // namespace VegaISA