arch-vega: Add Vega ISA as a copy of GCN3
This changeset adds Vega support as a copy of GCN3. Configs have been modified to include both ISAs. Current implementation is not complete and needs modifications to fully comply with the ISA manual: https://developer.amd.com/wp-content/resources/ Vega_Shader_ISA_28July2017.pdf Change-Id: I608aa6747a45594f8e1bd7802da1883cf612168b Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42204 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Matt Sinclair
parent
c7ee47efc9
commit
f7d4ff6ef5
@@ -68,6 +68,12 @@ arch-gcn3:
|
||||
- Matt Poremba <matthew.poremba@amd.com>
|
||||
- Matt Sinclair <sinclair@cs.wisc.edu>
|
||||
|
||||
arch-vega:
|
||||
status: maintained
|
||||
maintainers:
|
||||
- Matt Poremba <matthew.poremba@amd.com>
|
||||
- Matt Sinclair <sinclair@cs.wisc.edu>
|
||||
|
||||
arch-mips:
|
||||
status: orphaned
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ env.SwitchingHeaders(
|
||||
'''),
|
||||
env.subst('${TARGET_ISA}'))
|
||||
|
||||
amdgpu_isa = ['gcn3']
|
||||
amdgpu_isa = ['gcn3', 'vega']
|
||||
|
||||
env.SwitchingHeaders(
|
||||
Split('''
|
||||
|
||||
45
src/arch/amdgpu/vega/SConscript
Normal file
45
src/arch/amdgpu/vega/SConscript
Normal file
@@ -0,0 +1,45 @@
|
||||
# -*- mode:python -*-
|
||||
|
||||
# Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
|
||||
Import('*')
|
||||
|
||||
if env['TARGET_GPU_ISA'] == 'vega':
|
||||
Source('decoder.cc')
|
||||
Source('insts/gpu_static_inst.cc')
|
||||
Source('insts/instructions.cc')
|
||||
Source('insts/op_encodings.cc')
|
||||
Source('isa.cc')
|
||||
Source('registers.cc')
|
||||
DebugFlag('VEGA', 'Debug flag for VEGA GPU ISA')
|
||||
36
src/arch/amdgpu/vega/SConsopts
Normal file
36
src/arch/amdgpu/vega/SConsopts
Normal file
@@ -0,0 +1,36 @@
|
||||
# -*- mode:python -*-
|
||||
|
||||
# Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# For use for simulation and test purposes only
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Import('*')
|
||||
|
||||
all_gpu_isa_list.append('vega')
|
||||
10817
src/arch/amdgpu/vega/decoder.cc
Normal file
10817
src/arch/amdgpu/vega/decoder.cc
Normal file
File diff suppressed because it is too large
Load Diff
1649
src/arch/amdgpu/vega/gpu_decoder.hh
Normal file
1649
src/arch/amdgpu/vega/gpu_decoder.hh
Normal file
File diff suppressed because it is too large
Load Diff
103
src/arch/amdgpu/vega/gpu_isa.hh
Normal file
103
src/arch/amdgpu/vega/gpu_isa.hh
Normal file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_GPU_ISA_HH__
|
||||
#define __ARCH_VEGA_GPU_ISA_HH__
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_registers.hh"
|
||||
#include "gpu-compute/dispatcher.hh"
|
||||
#include "gpu-compute/hsa_queue_entry.hh"
|
||||
#include "gpu-compute/misc.hh"
|
||||
|
||||
class Wavefront;
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
class GPUISA
|
||||
{
|
||||
public:
|
||||
GPUISA(Wavefront &wf);
|
||||
|
||||
template<typename T> T
|
||||
readConstVal(int opIdx) const
|
||||
{
|
||||
panic_if(!std::is_integral<T>::value, "Constant values must "
|
||||
"be an integer.\n");
|
||||
T val(0);
|
||||
|
||||
if (isPosConstVal(opIdx)) {
|
||||
val = (T)readPosConstReg(opIdx);
|
||||
}
|
||||
|
||||
if (isNegConstVal(opIdx)) {
|
||||
val = (T)readNegConstReg(opIdx);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
ScalarRegU32 readMiscReg(int opIdx) const;
|
||||
void writeMiscReg(int opIdx, ScalarRegU32 operandVal);
|
||||
bool hasScalarUnit() const { return true; }
|
||||
void advancePC(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
private:
|
||||
ScalarRegU32 readPosConstReg(int opIdx) const
|
||||
{
|
||||
return posConstRegs[opIdx - REG_INT_CONST_POS_MIN];
|
||||
}
|
||||
|
||||
ScalarRegI32 readNegConstReg(int opIdx) const
|
||||
{
|
||||
return negConstRegs[opIdx - REG_INT_CONST_NEG_MIN];
|
||||
}
|
||||
|
||||
static const std::array<const ScalarRegU32, NumPosConstRegs>
|
||||
posConstRegs;
|
||||
static const std::array<const ScalarRegI32, NumNegConstRegs>
|
||||
negConstRegs;
|
||||
|
||||
// parent wavefront
|
||||
Wavefront &wavefront;
|
||||
|
||||
// shader status bits
|
||||
StatusReg statusReg;
|
||||
// memory descriptor reg
|
||||
ScalarRegU32 m0;
|
||||
};
|
||||
} // namespace VegaISA
|
||||
|
||||
#endif // __ARCH_VEGA_GPU_ISA_HH__
|
||||
186
src/arch/amdgpu/vega/gpu_mem_helpers.hh
Normal file
186
src/arch/amdgpu/vega/gpu_mem_helpers.hh
Normal file
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_GPU_MEM_HELPERS_HH__
|
||||
#define __ARCH_VEGA_GPU_MEM_HELPERS_HH__
|
||||
|
||||
#include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
|
||||
#include "arch/amdgpu/vega/insts/op_encodings.hh"
|
||||
#include "debug/GPUMem.hh"
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
|
||||
/**
|
||||
* Helper function for instructions declared in op_encodings. This function
|
||||
* takes in all of the arguments for a given memory request we are trying to
|
||||
* initialize, then submits the request or requests depending on if the
|
||||
* original request is aligned or unaligned.
|
||||
*/
|
||||
template<typename T, int N>
|
||||
inline void
|
||||
initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
|
||||
bool is_atomic=false)
|
||||
{
|
||||
// local variables
|
||||
int req_size = N * sizeof(T);
|
||||
int block_size = gpuDynInst->computeUnit()->cacheLineSize();
|
||||
Addr vaddr = 0, split_addr = 0;
|
||||
bool misaligned_acc = false;
|
||||
RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
|
||||
PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
|
||||
|
||||
gpuDynInst->resetEntireStatusVector();
|
||||
for (int lane = 0; lane < VegaISA::NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
vaddr = gpuDynInst->addr[lane];
|
||||
|
||||
/**
|
||||
* the base address of the cache line where the the last
|
||||
* byte of the request will be stored.
|
||||
*/
|
||||
split_addr = roundDown(vaddr + req_size - 1, block_size);
|
||||
|
||||
assert(split_addr <= vaddr || split_addr - vaddr < block_size);
|
||||
/**
|
||||
* if the base cache line address of the last byte is
|
||||
* greater than the address of the first byte then we have
|
||||
* a misaligned access.
|
||||
*/
|
||||
misaligned_acc = split_addr > vaddr;
|
||||
|
||||
if (is_atomic) {
|
||||
// make sure request is word aligned
|
||||
assert((vaddr & 0x3) == 0);
|
||||
|
||||
// a given lane's atomic can't cross cache lines
|
||||
assert(!misaligned_acc);
|
||||
|
||||
req = std::make_shared<Request>(0, vaddr, sizeof(T), 0,
|
||||
gpuDynInst->computeUnit()->masterId(), 0,
|
||||
gpuDynInst->wfDynId,
|
||||
gpuDynInst->makeAtomicOpFunctor<T>(
|
||||
&(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
|
||||
&(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
|
||||
} else {
|
||||
req = std::make_shared<Request>(0, vaddr, req_size, 0,
|
||||
gpuDynInst->computeUnit()->masterId(), 0,
|
||||
gpuDynInst->wfDynId);
|
||||
}
|
||||
|
||||
if (misaligned_acc) {
|
||||
gpuDynInst->setStatusVector(lane, 2);
|
||||
req->splitOnVaddr(split_addr, req1, req2);
|
||||
gpuDynInst->setRequestFlags(req1);
|
||||
gpuDynInst->setRequestFlags(req2);
|
||||
pkt1 = new Packet(req1, mem_req_type);
|
||||
pkt2 = new Packet(req2, mem_req_type);
|
||||
pkt1->dataStatic(&(reinterpret_cast<T*>(
|
||||
gpuDynInst->d_data))[lane * N]);
|
||||
pkt2->dataStatic(&(reinterpret_cast<T*>(
|
||||
gpuDynInst->d_data))[lane * N + req1->getSize()]);
|
||||
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
|
||||
"request for %#x\n", gpuDynInst->cu_id,
|
||||
gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
|
||||
split_addr);
|
||||
gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
|
||||
gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
|
||||
} else {
|
||||
gpuDynInst->setStatusVector(lane, 1);
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
pkt = new Packet(req, mem_req_type);
|
||||
pkt->dataStatic(&(reinterpret_cast<T*>(
|
||||
gpuDynInst->d_data))[lane * N]);
|
||||
gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
|
||||
}
|
||||
} else { // if lane is not active, then no pending requests
|
||||
gpuDynInst->setStatusVector(lane, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for scalar instructions declared in op_encodings. This
|
||||
* function takes in all of the arguments for a given memory request we are
|
||||
* trying to initialize, then submits the request or requests depending on if
|
||||
* the original request is aligned or unaligned.
|
||||
*/
|
||||
template<typename T, int N>
|
||||
inline void
|
||||
initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
|
||||
{
|
||||
int req_size = N * sizeof(T);
|
||||
int block_size = gpuDynInst->computeUnit()->cacheLineSize();
|
||||
Addr vaddr = gpuDynInst->scalarAddr;
|
||||
|
||||
/**
|
||||
* the base address of the cache line where the the last byte of
|
||||
* the request will be stored.
|
||||
*/
|
||||
Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
|
||||
|
||||
assert(split_addr <= vaddr || split_addr - vaddr < block_size);
|
||||
/**
|
||||
* if the base cache line address of the last byte is greater
|
||||
* than the address of the first byte then we have a misaligned
|
||||
* access.
|
||||
*/
|
||||
bool misaligned_acc = split_addr > vaddr;
|
||||
|
||||
RequestPtr req = std::make_shared<Request>(0, vaddr, req_size, 0,
|
||||
gpuDynInst->computeUnit()->masterId(), 0,
|
||||
gpuDynInst->wfDynId);
|
||||
|
||||
if (misaligned_acc) {
|
||||
RequestPtr req1, req2;
|
||||
req->splitOnVaddr(split_addr, req1, req2);
|
||||
gpuDynInst->numScalarReqs = 2;
|
||||
gpuDynInst->setRequestFlags(req1);
|
||||
gpuDynInst->setRequestFlags(req2);
|
||||
PacketPtr pkt1 = new Packet(req1, mem_req_type);
|
||||
PacketPtr pkt2 = new Packet(req2, mem_req_type);
|
||||
pkt1->dataStatic(gpuDynInst->scalar_data);
|
||||
pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
|
||||
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
|
||||
" %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
|
||||
gpuDynInst->wfSlotId, split_addr);
|
||||
gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
|
||||
gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
|
||||
} else {
|
||||
gpuDynInst->numScalarReqs = 1;
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
PacketPtr pkt = new Packet(req, mem_req_type);
|
||||
pkt->dataStatic(gpuDynInst->scalar_data);
|
||||
gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __ARCH_VEGA_GPU_MEM_HELPERS_HH__
|
||||
256
src/arch/amdgpu/vega/gpu_registers.hh
Normal file
256
src/arch/amdgpu/vega/gpu_registers.hh
Normal file
@@ -0,0 +1,256 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_REGISTERS_HH__
|
||||
#define __ARCH_VEGA_REGISTERS_HH__
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "arch/generic/vec_reg.hh"
|
||||
#include "base/intmath.hh"
|
||||
#include "base/logging.hh"
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
enum OpSelector : int
|
||||
{
|
||||
REG_SGPR_MIN = 0,
|
||||
REG_SGPR_MAX = 101,
|
||||
REG_FLAT_SCRATCH_LO = 102,
|
||||
REG_FLAT_SCRATCH_HI = 103,
|
||||
REG_XNACK_MASK_LO = 104,
|
||||
REG_XNACK_MASK_HI = 105,
|
||||
REG_VCC_LO = 106,
|
||||
REG_VCC_HI = 107,
|
||||
REG_TBA_LO = 108,
|
||||
REG_TBA_HI = 109,
|
||||
REG_TMA_LO = 110,
|
||||
REG_TMA_HI = 111,
|
||||
REG_TTMP_0 = 112,
|
||||
REG_TTMP_1 = 113,
|
||||
REG_TTMP_2 = 114,
|
||||
REG_TTMP_3 = 115,
|
||||
REG_TTMP_4 = 116,
|
||||
REG_TTMP_5 = 117,
|
||||
REG_TTMP_6 = 118,
|
||||
REG_TTMP_7 = 119,
|
||||
REG_TTMP_8 = 120,
|
||||
REG_TTMP_9 = 121,
|
||||
REG_TTMP_10 = 122,
|
||||
REG_TTMP_11 = 123,
|
||||
REG_M0 = 124,
|
||||
REG_RESERVED_1 = 125,
|
||||
REG_EXEC_LO = 126,
|
||||
REG_EXEC_HI = 127,
|
||||
REG_ZERO = 128,
|
||||
REG_INT_CONST_POS_MIN = 129,
|
||||
REG_INT_CONST_POS_MAX = 192,
|
||||
REG_INT_CONST_NEG_MIN = 193,
|
||||
REG_INT_CONST_NEG_MAX = 208,
|
||||
REG_RESERVED_2 = 209,
|
||||
REG_RESERVED_3 = 210,
|
||||
REG_RESERVED_4 = 211,
|
||||
REG_RESERVED_5 = 212,
|
||||
REG_RESERVED_6 = 213,
|
||||
REG_RESERVED_7 = 214,
|
||||
REG_RESERVED_8 = 215,
|
||||
REG_RESERVED_9 = 216,
|
||||
REG_RESERVED_10 = 217,
|
||||
REG_RESERVED_11 = 218,
|
||||
REG_RESERVED_12 = 219,
|
||||
REG_RESERVED_13 = 220,
|
||||
REG_RESERVED_14 = 221,
|
||||
REG_RESERVED_15 = 222,
|
||||
REG_RESERVED_16 = 223,
|
||||
REG_RESERVED_17 = 224,
|
||||
REG_RESERVED_18 = 225,
|
||||
REG_RESERVED_19 = 226,
|
||||
REG_RESERVED_20 = 227,
|
||||
REG_RESERVED_21 = 228,
|
||||
REG_RESERVED_22 = 229,
|
||||
REG_RESERVED_23 = 230,
|
||||
REG_RESERVED_24 = 231,
|
||||
REG_RESERVED_25 = 232,
|
||||
REG_RESERVED_26 = 233,
|
||||
REG_RESERVED_27 = 234,
|
||||
REG_RESERVED_28 = 235,
|
||||
REG_RESERVED_29 = 236,
|
||||
REG_RESERVED_30 = 237,
|
||||
REG_RESERVED_31 = 238,
|
||||
REG_RESERVED_32 = 239,
|
||||
REG_POS_HALF = 240,
|
||||
REG_NEG_HALF = 241,
|
||||
REG_POS_ONE = 242,
|
||||
REG_NEG_ONE = 243,
|
||||
REG_POS_TWO = 244,
|
||||
REG_NEG_TWO = 245,
|
||||
REG_POS_FOUR = 246,
|
||||
REG_NEG_FOUR = 247,
|
||||
REG_PI = 248,
|
||||
/* NOTE: SDWA and SWDA both refer to sub d-word addressing */
|
||||
REG_SRC_SWDA = 249,
|
||||
REG_SRC_DPP = 250,
|
||||
REG_VCCZ = 251,
|
||||
REG_EXECZ = 252,
|
||||
REG_SCC = 253,
|
||||
REG_LDS_DIRECT = 254,
|
||||
REG_SRC_LITERAL = 255,
|
||||
REG_VGPR_MIN = 256,
|
||||
REG_VGPR_MAX = 511
|
||||
};
|
||||
|
||||
constexpr size_t MaxOperandDwords(16);
|
||||
const int NumVecElemPerVecReg(64);
|
||||
// op selector values 129 - 192 correspond to const values 1 - 64
|
||||
const int NumPosConstRegs = REG_INT_CONST_POS_MAX
|
||||
- REG_INT_CONST_POS_MIN + 1;
|
||||
// op selector values 193 - 208 correspond to const values -1 - 16
|
||||
const int NumNegConstRegs = REG_INT_CONST_NEG_MAX
|
||||
- REG_INT_CONST_NEG_MIN + 1;
|
||||
const int BITS_PER_BYTE = 8;
|
||||
const int BITS_PER_WORD = 16;
|
||||
const int MSB_PER_BYTE = (BITS_PER_BYTE - 1);
|
||||
const int MSB_PER_WORD = (BITS_PER_WORD - 1);
|
||||
|
||||
// typedefs for the various sizes/types of scalar regs
|
||||
typedef uint8_t ScalarRegU8;
|
||||
typedef int8_t ScalarRegI8;
|
||||
typedef uint16_t ScalarRegU16;
|
||||
typedef int16_t ScalarRegI16;
|
||||
typedef uint32_t ScalarRegU32;
|
||||
typedef int32_t ScalarRegI32;
|
||||
typedef float ScalarRegF32;
|
||||
typedef uint64_t ScalarRegU64;
|
||||
typedef int64_t ScalarRegI64;
|
||||
typedef double ScalarRegF64;
|
||||
|
||||
// typedefs for the various sizes/types of vector reg elements
|
||||
typedef uint8_t VecElemU8;
|
||||
typedef int8_t VecElemI8;
|
||||
typedef uint16_t VecElemU16;
|
||||
typedef int16_t VecElemI16;
|
||||
typedef uint32_t VecElemU32;
|
||||
typedef int32_t VecElemI32;
|
||||
typedef float VecElemF32;
|
||||
typedef uint64_t VecElemU64;
|
||||
typedef int64_t VecElemI64;
|
||||
typedef double VecElemF64;
|
||||
|
||||
const int DWORDSize = sizeof(VecElemU32);
|
||||
/**
|
||||
* Size of a single-precision register in DWORDs.
|
||||
*/
|
||||
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
|
||||
|
||||
// typedefs for the various sizes/types of vector regs
|
||||
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;
|
||||
using VecRegI8 = ::VecRegT<VecElemI8, NumVecElemPerVecReg, false>;
|
||||
using VecRegU16 = ::VecRegT<VecElemU16, NumVecElemPerVecReg, false>;
|
||||
using VecRegI16 = ::VecRegT<VecElemI16, NumVecElemPerVecReg, false>;
|
||||
using VecRegU32 = ::VecRegT<VecElemU32, NumVecElemPerVecReg, false>;
|
||||
using VecRegI32 = ::VecRegT<VecElemI32, NumVecElemPerVecReg, false>;
|
||||
using VecRegF32 = ::VecRegT<VecElemF32, NumVecElemPerVecReg, false>;
|
||||
using VecRegU64 = ::VecRegT<VecElemU64, NumVecElemPerVecReg, false>;
|
||||
using VecRegI64 = ::VecRegT<VecElemI64, NumVecElemPerVecReg, false>;
|
||||
using VecRegF64 = ::VecRegT<VecElemF64, NumVecElemPerVecReg, false>;
|
||||
// non-writeable versions of vector regs
|
||||
using ConstVecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegI8 = ::VecRegT<VecElemI8, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegU16 = ::VecRegT<VecElemU16, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegI16 = ::VecRegT<VecElemI16, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegU32 = ::VecRegT<VecElemU32, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegI32 = ::VecRegT<VecElemI32, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegF32 = ::VecRegT<VecElemF32, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegU64 = ::VecRegT<VecElemU64, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegI64 = ::VecRegT<VecElemI64, NumVecElemPerVecReg, true>;
|
||||
using ConstVecRegF64 = ::VecRegT<VecElemF64, NumVecElemPerVecReg, true>;
|
||||
|
||||
using VecRegContainerU8 = VecRegU8::Container;
|
||||
using VecRegContainerU16 = VecRegU16::Container;
|
||||
using VecRegContainerU32 = VecRegU32::Container;
|
||||
using VecRegContainerU64 = VecRegU64::Container;
|
||||
|
||||
struct StatusReg
|
||||
{
|
||||
StatusReg() : SCC(0), SPI_PRIO(0), USER_PRIO(0), PRIV(0), TRAP_EN(0),
|
||||
TTRACE_EN(0), EXPORT_RDY(0), EXECZ(0), VCCZ(0), IN_TG(0),
|
||||
IN_BARRIER(0), HALT(0), TRAP(0), TTRACE_CU_EN(0), VALID(0),
|
||||
ECC_ERR(0), SKIP_EXPORT(0), PERF_EN(0), COND_DBG_USER(0),
|
||||
COND_DBG_SYS(0), ALLOW_REPLAY(0), INSTRUCTION_ATC(0), RESERVED(0),
|
||||
MUST_EXPORT(0), RESERVED_1(0)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t SCC : 1;
|
||||
uint32_t SPI_PRIO : 2;
|
||||
uint32_t USER_PRIO : 2;
|
||||
uint32_t PRIV : 1;
|
||||
uint32_t TRAP_EN : 1;
|
||||
uint32_t TTRACE_EN : 1;
|
||||
uint32_t EXPORT_RDY : 1;
|
||||
uint32_t EXECZ : 1;
|
||||
uint32_t VCCZ : 1;
|
||||
uint32_t IN_TG : 1;
|
||||
uint32_t IN_BARRIER : 1;
|
||||
uint32_t HALT : 1;
|
||||
uint32_t TRAP : 1;
|
||||
uint32_t TTRACE_CU_EN : 1;
|
||||
uint32_t VALID : 1;
|
||||
uint32_t ECC_ERR : 1;
|
||||
uint32_t SKIP_EXPORT : 1;
|
||||
uint32_t PERF_EN : 1;
|
||||
uint32_t COND_DBG_USER : 1;
|
||||
uint32_t COND_DBG_SYS : 1;
|
||||
uint32_t ALLOW_REPLAY : 1;
|
||||
uint32_t INSTRUCTION_ATC : 1;
|
||||
uint32_t RESERVED : 3;
|
||||
uint32_t MUST_EXPORT : 1;
|
||||
uint32_t RESERVED_1 : 4;
|
||||
};
|
||||
|
||||
std::string opSelectorToRegSym(int opIdx, int numRegs=0);
|
||||
int opSelectorToRegIdx(int opIdx, int numScalarRegs);
|
||||
bool isPosConstVal(int opIdx);
|
||||
bool isNegConstVal(int opIdx);
|
||||
bool isConstVal(int opIdx);
|
||||
bool isLiteral(int opIdx);
|
||||
bool isScalarReg(int opIdx);
|
||||
bool isVectorReg(int opIdx);
|
||||
bool isFlatScratchReg(int opIdx);
|
||||
bool isExecMask(int opIdx);
|
||||
bool isVccReg(int opIdx);
|
||||
} // namespace VegaISA
|
||||
|
||||
#endif // __ARCH_VEGA_REGISTERS_HH__
|
||||
64
src/arch/amdgpu/vega/gpu_types.hh
Normal file
64
src/arch/amdgpu/vega/gpu_types.hh
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_GPU_TYPES_HH__
|
||||
#define __ARCH_VEGA_GPU_TYPES_HH__
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
union InstFormat;
|
||||
|
||||
/**
|
||||
* used to represnt a GPU inst in its raw format. VEGA
|
||||
* instructions may be 32b or 64b, therefore we represent
|
||||
* a raw inst with 64b to ensure that all of its inst data,
|
||||
* including potential immediate values, may be represented
|
||||
* in the worst case.
|
||||
*/
|
||||
typedef uint64_t RawMachInst;
|
||||
|
||||
/**
|
||||
* used to represent the encoding of a VEGA inst. each portion
|
||||
* of a VEGA inst must be 1 DWORD (32b), so we use a pointer
|
||||
* to InstFormat type (which is 32b). for the case in which we
|
||||
* need multiple DWORDS to represnt a single inst, this pointer
|
||||
* essentialy acts as an array of the DWORDs needed to represent
|
||||
* the entire inst encoding.
|
||||
*/
|
||||
typedef InstFormat *MachInst;
|
||||
|
||||
} // namespace VegaISA
|
||||
|
||||
#endif // __ARCH_VEGA_GPU_TYPES_HH__
|
||||
58
src/arch/amdgpu/vega/insts/gpu_static_inst.cc
Normal file
58
src/arch/amdgpu/vega/insts/gpu_static_inst.cc
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_decoder.hh"
|
||||
#include "arch/amdgpu/vega/insts/instructions.hh"
|
||||
#include "debug/GPUExec.hh"
|
||||
#include "gpu-compute/flexible_pool_manager.hh"
|
||||
#include "gpu-compute/shader.hh"
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
VEGAGPUStaticInst::VEGAGPUStaticInst(const std::string &opcode)
|
||||
: GPUStaticInst(opcode), _srcLiteral(0)
|
||||
{
|
||||
}
|
||||
|
||||
VEGAGPUStaticInst::~VEGAGPUStaticInst()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
VEGAGPUStaticInst::panicUnimplemented() const
|
||||
{
|
||||
fatal("Encountered unimplemented VEGA instruction: %s\n", _opcode);
|
||||
}
|
||||
} // namespace VegaISA
|
||||
94
src/arch/amdgpu/vega/insts/gpu_static_inst.hh
Normal file
94
src/arch/amdgpu/vega/insts/gpu_static_inst.hh
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_INSTS_GPU_STATIC_INST_HH__
|
||||
#define __ARCH_VEGA_INSTS_GPU_STATIC_INST_HH__
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_registers.hh"
|
||||
#include "arch/amdgpu/vega/operand.hh"
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
#include "gpu-compute/scalar_register_file.hh"
|
||||
#include "gpu-compute/vector_register_file.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
class VEGAGPUStaticInst : public GPUStaticInst
|
||||
{
|
||||
public:
|
||||
VEGAGPUStaticInst(const std::string &opcode);
|
||||
~VEGAGPUStaticInst();
|
||||
|
||||
void generateDisassembly() override { disassembly = _opcode; }
|
||||
|
||||
bool
|
||||
isFlatScratchRegister(int opIdx) override
|
||||
{
|
||||
return isFlatScratchReg(opIdx);
|
||||
}
|
||||
|
||||
bool isScalarRegister(int opIdx) override { return false; }
|
||||
bool isVectorRegister(int opIdx) override { return false; }
|
||||
bool isSrcOperand(int opIdx) override { return false; }
|
||||
bool isDstOperand(int opIdx) override { return false; }
|
||||
int getOperandSize(int opIdx) override { return 0; }
|
||||
|
||||
int
|
||||
getRegisterIndex(int opIdx, int num_scalar_regs) override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of tokens needed by the coalescer. In VEGA there
|
||||
* is generally one packet per memory request per lane generated. In
|
||||
* HSAIL, the number of dest operands is used for loads and src
|
||||
* operands for stores. This method should be overriden on a per-inst
|
||||
* basis when this value differs.
|
||||
*/
|
||||
int coalescerTokenCount() const override { return 1; }
|
||||
ScalarRegU32 srcLiteral() const override { return _srcLiteral; }
|
||||
|
||||
protected:
|
||||
void panicUnimplemented() const;
|
||||
|
||||
/**
|
||||
* if the instruction has a src literal - an immediate
|
||||
* value that is part of the instruction stream - we
|
||||
* store that here
|
||||
*/
|
||||
ScalarRegU32 _srcLiteral;
|
||||
}; // class VEGAGPUStaticInst
|
||||
|
||||
} // namespace VegaISA
|
||||
#endif //__ARCH_VEGA_INSTS_GPU_STATIC_INST_HH__
|
||||
894
src/arch/amdgpu/vega/insts/inst_util.hh
Normal file
894
src/arch/amdgpu/vega/insts/inst_util.hh
Normal file
@@ -0,0 +1,894 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_INSTS_INST_UTIL_HH__
|
||||
#define __ARCH_VEGA_INSTS_INST_UTIL_HH__
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_registers.hh"
|
||||
|
||||
// values for SDWA select operations
|
||||
enum SDWASelVals : int
|
||||
{
|
||||
SDWA_BYTE_0 = 0, /* select data[7:0] */
|
||||
SDWA_BYTE_1 = 1, /* select data[15:8] */
|
||||
SDWA_BYTE_2 = 2, /* select data[23:16] */
|
||||
SDWA_BYTE_3 = 3, /* select data[31:24] */
|
||||
SDWA_WORD_0 = 4, /* select data[15:0] */
|
||||
SDWA_WORD_1 = 5, /* select data[31:16] */
|
||||
SDWA_DWORD = 6 /* select data[31:0] */
|
||||
};
|
||||
|
||||
// values for format of destination bits for SDWA operations
|
||||
enum SDWADstVals : int
|
||||
{
|
||||
SDWA_UNUSED_PAD = 0, /* Pad all unused bits with 0 */
|
||||
SDWA_UNUSED_SEXT = 1, /* Sign-extend upper bits; pad lower bits w/ 0 */
|
||||
SDWA_UNUSED_PRESERVE = 2 /* select data[31:0] */
|
||||
};
|
||||
|
||||
// values for DPP operations
|
||||
enum SqDPPVals : int
|
||||
{
|
||||
SQ_DPP_QUAD_PERM_MAX = 0xFF,
|
||||
SQ_DPP_RESERVED = 0x100,
|
||||
SQ_DPP_ROW_SL1 = 0x101,
|
||||
SQ_DPP_ROW_SL15 = 0x10F,
|
||||
SQ_DPP_ROW_SR1 = 0x111,
|
||||
SQ_DPP_ROW_SR15 = 0x11F,
|
||||
SQ_DPP_ROW_RR1 = 0x121,
|
||||
SQ_DPP_ROW_RR15 = 0x12F,
|
||||
SQ_DPP_WF_SL1 = 0x130,
|
||||
SQ_DPP_WF_RL1 = 0x134,
|
||||
SQ_DPP_WF_SR1 = 0x138,
|
||||
SQ_DPP_WF_RR1 = 0x13C,
|
||||
SQ_DPP_ROW_MIRROR = 0x140,
|
||||
SQ_DPP_ROW_HALF_MIRROR = 0x141,
|
||||
SQ_DPP_ROW_BCAST15 = 0x142,
|
||||
SQ_DPP_ROW_BCAST31 = 0x143
|
||||
};
|
||||
static const int ROW_SIZE = 16; /* 16 registers per row */
|
||||
static const int NUM_BANKS = 4; /* 64 registers, 16/bank */
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
template<typename T>
|
||||
inline T
|
||||
wholeQuadMode(T val)
|
||||
{
|
||||
T wqm = 0;
|
||||
T mask = 0xF;
|
||||
|
||||
for (T bits = val; mask != 0; mask <<= 4)
|
||||
if ((bits & mask) != 0)
|
||||
wqm |= mask;
|
||||
|
||||
return wqm;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T
|
||||
quadMask(T val)
|
||||
{
|
||||
T qmsk = 0;
|
||||
T mask = 0xF;
|
||||
T qbit = 0x1;
|
||||
|
||||
for (T bits = val; mask != 0; mask <<= 4, qbit <<= 1) {
|
||||
if (bits & mask) {
|
||||
qmsk |= qbit;
|
||||
}
|
||||
}
|
||||
|
||||
return qmsk;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline ScalarRegI32
|
||||
countZeroBits(T val)
|
||||
{
|
||||
ScalarRegI32 num_zeros
|
||||
= std::numeric_limits<T>::digits - popCount(val);
|
||||
|
||||
return num_zeros;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline ScalarRegI32
|
||||
findFirstZero(T val)
|
||||
{
|
||||
if (val == ~T(0)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return findLsbSet(~val);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline ScalarRegI32
|
||||
findFirstOne(T val)
|
||||
{
|
||||
if (!val) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return findLsbSet(val);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline ScalarRegI32
|
||||
findFirstOneMsb(T val)
|
||||
{
|
||||
if (!val) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return findMsbSet(val);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline ScalarRegI32
|
||||
countZeroBitsMsb(T val)
|
||||
{
|
||||
if (!val) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return std::numeric_limits<T>::digits - 1 - findMsbSet(val);
|
||||
}
|
||||
|
||||
inline ScalarRegI32
|
||||
firstOppositeSignBit(ScalarRegI32 val)
|
||||
{
|
||||
bool found(false);
|
||||
bool sign_bit = (val & 0x80000000) != 0;
|
||||
ScalarRegU32 tmp_val(0);
|
||||
int count(0);
|
||||
|
||||
if (!val || val == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < std::numeric_limits<ScalarRegU32>::digits; ++i) {
|
||||
tmp_val = val & (0x80000000 >> i);
|
||||
|
||||
if (!sign_bit) {
|
||||
if (tmp_val) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!tmp_val) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
++count;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
return count;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
inline ScalarRegI32
|
||||
firstOppositeSignBit(ScalarRegI64 val)
|
||||
{
|
||||
bool found(false);
|
||||
bool sign_bit = (val & 0x8000000000000000ULL) != 0;
|
||||
ScalarRegU64 tmp_val(0);
|
||||
int count(0);
|
||||
|
||||
if (!val || val == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < std::numeric_limits<ScalarRegU64>::digits; ++i) {
|
||||
tmp_val = val & (0x8000000000000000ULL >> i);
|
||||
|
||||
if (!sign_bit) {
|
||||
if (tmp_val) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!tmp_val) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
++count;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
return count;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T
|
||||
median(T val_0, T val_1, T val_2)
|
||||
{
|
||||
if (std::is_floating_point<T>::value) {
|
||||
return std::fmax(std::fmin(val_0, val_1),
|
||||
std::fmin(std::fmax(val_0, val_1), val_2));
|
||||
} else {
|
||||
return std::max(std::min(val_0, val_1),
|
||||
std::min(std::max(val_0, val_1), val_2));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T roundNearestEven(T val)
|
||||
{
|
||||
T int_part = 0;
|
||||
T nearest_round = std::floor(val + 0.5);
|
||||
if ((int)std::floor(val) % 2 == 0
|
||||
&& std::modf(std::abs(val), &int_part) == 0.5) {
|
||||
nearest_round = nearest_round - 1;
|
||||
}
|
||||
|
||||
return nearest_round;
|
||||
}
|
||||
|
||||
inline VecElemU32
|
||||
muladd(VecElemU64 &dst, VecElemU32 val_0, VecElemU32 val_1,
|
||||
VecElemU64 val_2)
|
||||
{
|
||||
__uint128_t u0 = (__uint128_t)val_0;
|
||||
__uint128_t u1 = (__uint128_t)val_1;
|
||||
__uint128_t u2 = (__uint128_t)val_2;
|
||||
__uint128_t result = u0 * u1 + u2;
|
||||
|
||||
dst = (VecElemU64)result;
|
||||
|
||||
return (VecElemU32)(result >> 64) ? 1 : 0;
|
||||
}
|
||||
|
||||
inline VecElemU32
|
||||
muladd(VecElemI64 &dst, VecElemI32 val_0, VecElemI32 val_1,
|
||||
VecElemI64 val_2)
|
||||
{
|
||||
__int128_t u0 = (__int128_t)val_0;
|
||||
__int128_t u1 = (__int128_t)val_1;
|
||||
__int128_t u2 = (__int128_t)val_2;
|
||||
__int128_t result = u0 * u1 + u2;
|
||||
|
||||
dst = (VecElemI64)result;
|
||||
|
||||
return (VecElemU32)(result >> 64) ? 1 : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dppInstImpl is a helper function that performs the inputted operation
|
||||
* on the inputted vector register lane. The returned output lane
|
||||
* represents the input lane given the destination lane and DPP_CTRL word.
|
||||
*
|
||||
* Currently the values are:
|
||||
* 0x0 - 0xFF: full permute of four threads
|
||||
* 0x100: reserved
|
||||
* 0x101 - 0x10F: row shift right by 1-15 threads
|
||||
* 0x111 - 0x11F: row shift right by 1-15 threads
|
||||
* 0x121 - 0x12F: row shift right by 1-15 threads
|
||||
* 0x130: wavefront left shift by 1 thread
|
||||
* 0x134: wavefront left rotate by 1 thread
|
||||
* 0x138: wavefront right shift by 1 thread
|
||||
* 0x13C: wavefront right rotate by 1 thread
|
||||
* 0x140: mirror threads within row
|
||||
* 0x141: mirror threads within 1/2 row (8 threads)
|
||||
* 0x142: broadcast 15th thread of each row to next row
|
||||
* 0x143: broadcast thread 31 to rows 2 and 3
|
||||
*/
|
||||
int dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum,
|
||||
int rowOffset, bool & outOfBounds)
|
||||
{
|
||||
// local variables
|
||||
// newLane will be the same as the input lane unless swizzling happens
|
||||
int newLane = currLane;
|
||||
// for shift/rotate permutations; positive values are LEFT rotates
|
||||
int count = 1;
|
||||
int localRowOffset = rowOffset;
|
||||
int localRowNum = rowNum;
|
||||
|
||||
if (dppCtrl <= SQ_DPP_QUAD_PERM_MAX) { // DPP_QUAD_PERM{00:FF}
|
||||
int quadBase = (currLane & ~(3));
|
||||
int quadPix = (currLane & 3);
|
||||
quadPix = ((dppCtrl >> (2 * quadPix)) & 3);
|
||||
newLane = (quadBase | quadPix);
|
||||
} else if (dppCtrl == SQ_DPP_RESERVED) {
|
||||
panic("ERROR: instruction using reserved DPP_CTRL value\n");
|
||||
} else if ((dppCtrl >= SQ_DPP_ROW_SL1) &&
|
||||
(dppCtrl <= SQ_DPP_ROW_SL15)) { // DPP_ROW_SL{1:15}
|
||||
count -= (dppCtrl - SQ_DPP_ROW_SL1 + 1);
|
||||
if ((localRowOffset + count >= 0) &&
|
||||
(localRowOffset + count < ROW_SIZE)) {
|
||||
localRowOffset += count;
|
||||
newLane = (rowNum | localRowOffset);
|
||||
} else {
|
||||
outOfBounds = true;
|
||||
}
|
||||
} else if ((dppCtrl >= SQ_DPP_ROW_SR1) &&
|
||||
(dppCtrl <= SQ_DPP_ROW_SR15)) { // DPP_ROW_SR{1:15}
|
||||
count -= (dppCtrl - SQ_DPP_ROW_SR1 + 1);
|
||||
if ((localRowOffset + count >= 0) &&
|
||||
(localRowOffset + count < ROW_SIZE)) {
|
||||
localRowOffset += count;
|
||||
newLane = (rowNum | localRowOffset);
|
||||
} else {
|
||||
outOfBounds = true;
|
||||
}
|
||||
} else if ((dppCtrl >= SQ_DPP_ROW_RR1) &&
|
||||
(dppCtrl <= SQ_DPP_ROW_RR15)) { // DPP_ROW_RR{1:15}
|
||||
count -= (dppCtrl - SQ_DPP_ROW_RR1 + 1);
|
||||
localRowOffset = (localRowOffset + count + ROW_SIZE) % ROW_SIZE;
|
||||
newLane = (rowNum | localRowOffset);
|
||||
} else if (dppCtrl == SQ_DPP_WF_SL1) { // DPP_WF_SL1
|
||||
count = 1;
|
||||
if ((currLane >= 0) && (currLane < NumVecElemPerVecReg)) {
|
||||
newLane += count;
|
||||
} else {
|
||||
outOfBounds = true;
|
||||
}
|
||||
} else if (dppCtrl == SQ_DPP_WF_RL1) { // DPP_WF_RL1
|
||||
count = 1;
|
||||
newLane = (currLane + count + NumVecElemPerVecReg) %
|
||||
NumVecElemPerVecReg;
|
||||
} else if (dppCtrl == SQ_DPP_WF_SR1) { // DPP_WF_SR1
|
||||
count = -1;
|
||||
int currVal = (currLane + count);
|
||||
if ((currVal >= 0) && (currVal < NumVecElemPerVecReg)) {
|
||||
newLane += count;
|
||||
} else {
|
||||
outOfBounds = true;
|
||||
}
|
||||
} else if (dppCtrl == SQ_DPP_WF_RR1) { // DPP_WF_RR1
|
||||
count = -1;
|
||||
newLane = (currLane + count + NumVecElemPerVecReg) %
|
||||
NumVecElemPerVecReg;
|
||||
} else if (dppCtrl == SQ_DPP_ROW_MIRROR) { // DPP_ROW_MIRROR
|
||||
localRowOffset = (15 - localRowOffset);
|
||||
newLane = (rowNum | localRowOffset);
|
||||
} else if (dppCtrl == SQ_DPP_ROW_HALF_MIRROR) { // DPP_ROW_HALF_MIRROR
|
||||
localRowNum = (currLane & -0x7);
|
||||
localRowOffset = (currLane & 0x7);
|
||||
localRowOffset = (7 - localRowNum);
|
||||
newLane = (localRowNum | localRowOffset);
|
||||
} else if (dppCtrl == SQ_DPP_ROW_BCAST15) { // DPP_ROW_BCAST15
|
||||
count = 15;
|
||||
if (currLane > count) {
|
||||
newLane = (currLane & ~count) - 1;
|
||||
}
|
||||
} else if (dppCtrl == SQ_DPP_ROW_BCAST31) { // DPP_ROW_BCAST31
|
||||
count = 31;
|
||||
if (currLane > count) {
|
||||
newLane = (currLane & ~count) - 1;
|
||||
}
|
||||
} else {
|
||||
panic("Unimplemented DPP control operation: %d\n", dppCtrl);
|
||||
}
|
||||
|
||||
return newLane;
|
||||
}
|
||||
|
||||
/**
|
||||
* processDPP is a helper function for implementing Data Parallel Primitive
|
||||
* instructions. This function may be called by many different VOP1
|
||||
* instructions to do operations within a register.
|
||||
*/
|
||||
template<typename T>
|
||||
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst,
|
||||
T & src0)
|
||||
{
|
||||
// local variables
|
||||
SqDPPVals dppCtrl = (SqDPPVals)dppInst.DPP_CTRL;
|
||||
int boundCtrl = dppInst.BOUND_CTRL;
|
||||
int bankMask = dppInst.BANK_MASK;
|
||||
int rowMask = dppInst.ROW_MASK;
|
||||
// row, bank info to be calculated per lane
|
||||
int rowNum = 0, bankNum = 0, rowOffset = 0;
|
||||
// outLane will be the same as the input lane unless swizzling happens
|
||||
int outLane = 0;
|
||||
bool laneDisabled = false;
|
||||
// flags used for determining if a lane should be written to/reset/etc.
|
||||
bool outOfBounds = false, zeroSrc = false;
|
||||
long long threadValid = 0;
|
||||
|
||||
/**
|
||||
* STEP 1a: check if the absolute value (ABS) or negation (NEG) tags
|
||||
* are set. If so, do the appropriate action(s) on src0 and/or src1.
|
||||
*
|
||||
* NOTE: ABS takes priority over NEG.
|
||||
*/
|
||||
if (dppInst.SRC0_NEG) {
|
||||
src0.negModifier();
|
||||
}
|
||||
|
||||
if (dppInst.SRC0_ABS) {
|
||||
src0.absModifier();
|
||||
}
|
||||
|
||||
// iterate over all register lanes, performing steps 2-4
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
threadValid = (0x1LL << lane);
|
||||
/**
|
||||
* STEP 2: check the row and bank mask values. These determine
|
||||
* which threads are enabled for the subsequent DPP_CTRL
|
||||
* operations.
|
||||
*/
|
||||
rowNum = (lane / ROW_SIZE);
|
||||
rowOffset = (lane % ROW_SIZE);
|
||||
bankNum = (rowOffset / NUM_BANKS);
|
||||
|
||||
if (((rowMask & (0x1 << rowNum)) == 0) /* row mask */ ||
|
||||
((bankMask & (0x1 << bankNum)) == 0) /* bank mask */) {
|
||||
laneDisabled = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/**
|
||||
* STEP 4: Handle the potential values of DPP_CTRL:
|
||||
* 0x0 - 0xFF: full permute of four threads
|
||||
* 0x100: reserved
|
||||
* 0x101 - 0x10F: row shift right by 1-15 threads
|
||||
* 0x111 - 0x11F: row shift right by 1-15 threads
|
||||
* 0x121 - 0x12F: row shift right by 1-15 threads
|
||||
* 0x130: wavefront left shift by 1 thread
|
||||
* 0x134: wavefront left rotate by 1 thread
|
||||
* 0x138: wavefront right shift by 1 thread
|
||||
* 0x13C: wavefront right rotate by 1 thread
|
||||
* 0x140: mirror threads within row
|
||||
* 0x141: mirror threads within 1/2 row (8 threads)
|
||||
* 0x142: broadcast 15th thread of each row to next row
|
||||
* 0x143: broadcast thread 31 to rows 2 and 3
|
||||
*/
|
||||
if (!laneDisabled) {
|
||||
outLane = dppInstImpl(dppCtrl, lane, rowNum, rowOffset,
|
||||
outOfBounds);
|
||||
}
|
||||
|
||||
/**
|
||||
* STEP 4: Implement bound control for disabled threads. If thread
|
||||
* is disabled but boundCtrl is set, then we need to set the source
|
||||
* data to 0 (i.e., set this lane to 0).
|
||||
*/
|
||||
if (laneDisabled) {
|
||||
threadValid = 0;
|
||||
} else if (outOfBounds) {
|
||||
if (boundCtrl == 1) {
|
||||
zeroSrc = true;
|
||||
} else {
|
||||
threadValid = 0;
|
||||
}
|
||||
} else if (!gpuDynInst->exec_mask[lane]) {
|
||||
if (boundCtrl == 1) {
|
||||
zeroSrc = true;
|
||||
} else {
|
||||
threadValid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (threadValid != 0 && !outOfBounds && !zeroSrc) {
|
||||
assert(!laneDisabled);
|
||||
src0[outLane] = src0[lane];
|
||||
} else if (zeroSrc) {
|
||||
src0[lane] = 0;
|
||||
}
|
||||
|
||||
// reset for next iteration
|
||||
laneDisabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* processDPP is a helper function for implementing Data Parallel Primitive
|
||||
* instructions. This function may be called by many different
|
||||
* VOP2/VOPC instructions to do operations within a register.
|
||||
*/
|
||||
template<typename T>
|
||||
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst,
|
||||
T & src0, T & src1)
|
||||
{
|
||||
/**
|
||||
* STEP 1b: check if the absolute value (ABS) or negation (NEG) tags
|
||||
* are set. If so, do the appropriate action(s) on src0 and/or src1.
|
||||
*
|
||||
* NOTE: ABS takes priority over NEG.
|
||||
*/
|
||||
if (dppInst.SRC1_NEG) {
|
||||
src1.negModifier();
|
||||
}
|
||||
|
||||
if (dppInst.SRC1_ABS) {
|
||||
src1.absModifier();
|
||||
}
|
||||
|
||||
// Since only difference for VOP1 and VOP2/VOPC instructions is SRC1,
|
||||
// which is only used for negation/absolute value, call other version
|
||||
// to do everything else.
|
||||
processDPP(gpuDynInst, dppInst, src0);
|
||||
}
|
||||
|
||||
/**
|
||||
* sdwaInstSrcImpl_helper contains the per-lane code for selecting the
|
||||
* appropriate bytes/words of the lane and doing the appropriate
|
||||
* masking/padding/sign extending. It returns the value after these
|
||||
* operations are done on it.
|
||||
*/
|
||||
template<typename T>
|
||||
T sdwaInstSrcImpl_helper(T currOperVal, const T origOperVal,
|
||||
const SDWASelVals sel, const bool signExt)
|
||||
{
|
||||
// local variables
|
||||
int low_bit = 0, high_bit = 0;
|
||||
bool signExt_local = signExt;
|
||||
T retVal = 0;
|
||||
|
||||
// if we're preserving all of the bits, then we can immediately return
|
||||
if (sel == SDWA_DWORD) {
|
||||
return currOperVal;
|
||||
}
|
||||
|
||||
if (sel < SDWA_WORD_0) { // we are selecting 1 byte
|
||||
/*
|
||||
Process byte 0 first. This code eiter selects the original bits
|
||||
of byte 0, or makes the bits of the selected byte be byte 0 (and
|
||||
next either sign extends or zero's out upper bits).
|
||||
*/
|
||||
low_bit = (sel * VegaISA::BITS_PER_BYTE);
|
||||
high_bit = low_bit + VegaISA::MSB_PER_BYTE;
|
||||
retVal = bits(currOperVal, high_bit, low_bit);
|
||||
|
||||
// make sure update propagated, since used next
|
||||
panic_if(bits(retVal, VegaISA::MSB_PER_BYTE) !=
|
||||
bits(origOperVal, high_bit),
|
||||
"ERROR: SDWA byte update not propagated: retVal: %d, "
|
||||
"orig: %d\n", bits(retVal, VegaISA::MSB_PER_BYTE),
|
||||
bits(origOperVal, high_bit));
|
||||
// sign extended value depends on upper-most bit of the new byte 0
|
||||
signExt_local = (signExt &&
|
||||
(bits(retVal, VegaISA::MSB_PER_BYTE, 0) & 0x80));
|
||||
|
||||
// process all other bytes -- if sign extending, make them 1, else
|
||||
// all 0's so leave as is
|
||||
if (signExt_local) {
|
||||
retVal = (uint32_t)sext<VegaISA::MSB_PER_BYTE>(retVal);
|
||||
}
|
||||
} else if (sel < SDWA_DWORD) { // we are selecting 1 word
|
||||
/*
|
||||
Process word 0 first. This code eiter selects the original bits
|
||||
of word 0, or makes the bits of the selected word be word 0 (and
|
||||
next either sign extends or zero's out upper bits).
|
||||
*/
|
||||
low_bit = (sel & 1) * VegaISA::BITS_PER_WORD;
|
||||
high_bit = low_bit + VegaISA::MSB_PER_WORD;
|
||||
retVal = bits(currOperVal, high_bit, low_bit);
|
||||
|
||||
// make sure update propagated, since used next
|
||||
panic_if(bits(retVal, VegaISA::MSB_PER_WORD) !=
|
||||
bits(origOperVal, high_bit),
|
||||
"ERROR: SDWA word update not propagated: retVal: %d, "
|
||||
"orig: %d\n",
|
||||
bits(retVal, VegaISA::MSB_PER_WORD),
|
||||
bits(origOperVal, high_bit));
|
||||
// sign extended value depends on upper-most bit of the new word 0
|
||||
signExt_local = (signExt &&
|
||||
(bits(retVal, VegaISA::MSB_PER_WORD, 0) &
|
||||
0x8000));
|
||||
|
||||
// process other word -- if sign extending, make them 1, else all
|
||||
// 0's so leave as is
|
||||
if (signExt_local) {
|
||||
retVal = (uint32_t)sext<VegaISA::MSB_PER_WORD>(retVal);
|
||||
}
|
||||
} else {
|
||||
assert(sel != SDWA_DWORD); // should have returned earlier
|
||||
panic("Unimplemented SDWA select operation: %d\n", sel);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* sdwaInstSrcImpl is a helper function that selects the appropriate
|
||||
* bits/bytes for each lane of the inputted source operand of an SDWA
|
||||
* instruction, does the appropriate masking/padding/sign extending for the
|
||||
* non-selected bits/bytes, and updates the operands values with the
|
||||
* resultant value.
|
||||
*
|
||||
* The desired behavior is:
|
||||
* 1. Select the appropriate bits/bytes based on sel:
|
||||
* 0 (SDWA_BYTE_0): select data[7:0]
|
||||
* 1 (SDWA_BYTE_1): select data[15:8]
|
||||
* 2 (SDWA_BYTE_2): select data[23:16]
|
||||
* 3 (SDWA_BYTE_3): select data[31:24]
|
||||
* 4 (SDWA_WORD_0): select data[15:0]
|
||||
* 5 (SDWA_WORD_1): select data[31:16]
|
||||
* 6 (SDWA_DWORD): select data[31:0]
|
||||
* 2. if sign extend is set, then sign extend the value
|
||||
*/
|
||||
template<typename T>
|
||||
void sdwaInstSrcImpl(T & currOper, T & origCurrOper,
|
||||
const SDWASelVals sel, const bool signExt)
|
||||
{
|
||||
// iterate over all lanes, setting appropriate, selected value
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
currOper[lane] = sdwaInstSrcImpl_helper(currOper[lane],
|
||||
origCurrOper[lane], sel,
|
||||
signExt);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* sdwaInstDstImpl_helper contains the per-lane code for selecting the
|
||||
* appropriate bytes/words of the lane and doing the appropriate
|
||||
* masking/padding/sign extending. It returns the value after these
|
||||
* operations are done on it.
|
||||
*/
|
||||
template<typename T>
|
||||
T sdwaInstDstImpl_helper(T currDstVal, const T origDstVal,
|
||||
const bool clamp, const SDWASelVals sel,
|
||||
const SDWADstVals unusedBits_format)
|
||||
{
|
||||
// local variables
|
||||
int low_bit = 0, high_bit = 0;
|
||||
bool signExt = (unusedBits_format == SDWA_UNUSED_SEXT);
|
||||
//bool pad = (unusedBits_format == SDWA_UNUSED_PAD);
|
||||
bool preserve = (unusedBits_format == SDWA_UNUSED_PRESERVE);
|
||||
T retVal = 0, origBits_thisByte = 0, currBits_thisByte = 0,
|
||||
origBits_thisWord = 0, currBits_thisWord = 0, newBits = 0;
|
||||
|
||||
// if we're preserving all of the bits, then we can immediately return
|
||||
if (unusedBits_format == SDWA_UNUSED_PRESERVE) {
|
||||
assert(sel == SDWA_DWORD);
|
||||
return currDstVal;
|
||||
} else if (sel == SDWA_DWORD) {
|
||||
// NOTE: users may set the unused bits variable to anything in this
|
||||
// scenario, because it will be ignored
|
||||
return currDstVal;
|
||||
}
|
||||
|
||||
if (sel < SDWA_WORD_0) { // we are selecting 1 byte
|
||||
// if we sign extended depends on upper-most bit of byte 0
|
||||
signExt = (signExt &&
|
||||
(bits(currDstVal, VegaISA::MSB_PER_WORD, 0) & 0x80));
|
||||
|
||||
for (int byte = 0; byte < 4; ++byte) {
|
||||
low_bit = byte * VegaISA::BITS_PER_BYTE;
|
||||
high_bit = low_bit + VegaISA::MSB_PER_BYTE;
|
||||
/*
|
||||
Options:
|
||||
1. byte == sel: we are keeping all bits in this byte
|
||||
2. preserve is set: keep this byte as is because the
|
||||
output preserve flag is set
|
||||
3. byte > sel && signExt: we're sign extending and
|
||||
this byte is one of the bytes we need to sign extend
|
||||
*/
|
||||
origBits_thisByte = bits(origDstVal, high_bit, low_bit);
|
||||
currBits_thisByte = bits(currDstVal, high_bit, low_bit);
|
||||
newBits = ((byte == sel) ? origBits_thisByte :
|
||||
((preserve) ? currBits_thisByte :
|
||||
(((byte > sel) && signExt) ? 0xff : 0)));
|
||||
retVal = insertBits(retVal, high_bit, low_bit, newBits);
|
||||
}
|
||||
} else if (sel < SDWA_DWORD) { // we are selecting 1 word
|
||||
low_bit = 0;
|
||||
high_bit = low_bit + VegaISA::MSB_PER_WORD;
|
||||
// if we sign extended depends on upper-most bit of word 0
|
||||
signExt = (signExt &&
|
||||
(bits(currDstVal, high_bit, low_bit) & 0x8000));
|
||||
|
||||
for (int word = 0; word < 2; ++word) {
|
||||
low_bit = word * VegaISA::BITS_PER_WORD;
|
||||
high_bit = low_bit + VegaISA::MSB_PER_WORD;
|
||||
/*
|
||||
Options:
|
||||
1. word == sel & 1: we are keeping all bits in this word
|
||||
2. preserve is set: keep this word as is because the
|
||||
output preserve flag is set
|
||||
3. word > (sel & 1) && signExt: we're sign extending and
|
||||
this word is one of the words we need to sign extend
|
||||
*/
|
||||
origBits_thisWord = bits(origDstVal, high_bit, low_bit);
|
||||
currBits_thisWord = bits(currDstVal, high_bit, low_bit);
|
||||
newBits = ((word == (sel & 0x1)) ? origBits_thisWord :
|
||||
((preserve) ? currBits_thisWord :
|
||||
(((word > (sel & 0x1)) && signExt) ? 0xffff : 0)));
|
||||
retVal = insertBits(retVal, high_bit, low_bit, newBits);
|
||||
}
|
||||
} else {
|
||||
assert(sel != SDWA_DWORD); // should have returned earlier
|
||||
panic("Unimplemented SDWA select operation: %d\n", sel);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* sdwaInstDestImpl is a helper function that selects the appropriate
|
||||
* bits/bytes for the inputted dest operand of an SDWA instruction, does
|
||||
* the appropriate masking/padding/sign extending for the non-selected
|
||||
* bits/bytes, and updates the operands values with the resultant value.
|
||||
*
|
||||
* The desired behavior is:
|
||||
* 1. Select the appropriate bits/bytes based on sel:
|
||||
* 0 (SDWA_BYTE_0): select data[7:0]
|
||||
* 1 (SDWA_BYTE_1): select data[15:8]
|
||||
* 2 (SDWA_BYTE_2): select data[23:16]
|
||||
* 3 (SDWA_BYTE_3): select data[31:24]
|
||||
* 4 (SDWA_WORD_0): select data[15:0]
|
||||
* 5 (SDWA_WORD_1): select data[31:16]
|
||||
* 6 (SDWA_DWORD): select data[31:0]
|
||||
* 2. either pad, sign extend, or select all bits based on the value of
|
||||
* unusedBits_format:
|
||||
* 0 (SDWA_UNUSED_PAD): pad all unused bits with 0
|
||||
* 1 (SDWA_UNUSED_SEXT): sign-extend upper bits; pad lower bits w/ 0
|
||||
* 2 (SDWA_UNUSED_PRESERVE): select data[31:0]
|
||||
*/
|
||||
template<typename T>
|
||||
void sdwaInstDstImpl(T & dstOper, T & origDstOper, const bool clamp,
|
||||
const SDWASelVals sel,
|
||||
const SDWADstVals unusedBits_format)
|
||||
{
|
||||
// iterate over all lanes, setting appropriate, selected value
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
dstOper[lane] = sdwaInstDstImpl_helper(dstOper[lane],
|
||||
origDstOper[lane], clamp,
|
||||
sel, unusedBits_format);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* processSDWA_srcHelper is a helper function for implementing sub d-word
|
||||
* addressing instructions for the src operands. This function may be
|
||||
* called by many different VOP1/VOP2/VOPC instructions to do operations
|
||||
* within a register. This function is also agnostic of which operand it
|
||||
* is operating on, so that it can be called for any src operand.
|
||||
*/
|
||||
template<typename T>
|
||||
void processSDWA_src_helper(T & currSrc, T & origCurrSrc,
|
||||
const SDWASelVals src_sel,
|
||||
const bool src_signExt, const bool src_abs,
|
||||
const bool src_neg)
|
||||
{
|
||||
/**
|
||||
* STEP 1: check if the absolute value (ABS) or negation (NEG) tags
|
||||
* are set. If so, do the appropriate action(s) on the src operand.
|
||||
*
|
||||
* NOTE: According to the CSim implementation, ABS takes priority over
|
||||
* NEG.
|
||||
*/
|
||||
if (src_neg) {
|
||||
currSrc.negModifier();
|
||||
}
|
||||
|
||||
if (src_abs) {
|
||||
currSrc.absModifier();
|
||||
}
|
||||
|
||||
/**
|
||||
* STEP 2: select the appropriate bits for each lane of source operand.
|
||||
*/
|
||||
sdwaInstSrcImpl(currSrc, origCurrSrc, src_sel, src_signExt);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* processSDWA_src is a helper function for implementing sub d-word
|
||||
* addressing instructions for the src operands. This function may be
|
||||
* called by many different VOP1 instructions to do operations within a
|
||||
* register. processSDWA_dst is called after the math, while
|
||||
* processSDWA_src is called before the math.
|
||||
*/
|
||||
template<typename T>
|
||||
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T & src0, T & origSrc0)
|
||||
{
|
||||
// local variables
|
||||
const SDWASelVals src0_sel = (SDWASelVals)sdwaInst.SRC0_SEL;
|
||||
const bool src0_signExt = sdwaInst.SRC0_SEXT;
|
||||
const bool src0_neg = sdwaInst.SRC0_NEG;
|
||||
const bool src0_abs = sdwaInst.SRC0_ABS;
|
||||
|
||||
// NOTE: difference between VOP1 and VOP2/VOPC is that there is no src1
|
||||
// operand. So ensure that SRC1 fields are not set, then call helper
|
||||
// function only on src0.
|
||||
assert(!sdwaInst.SRC1_SEXT);
|
||||
assert(!sdwaInst.SRC1_NEG);
|
||||
assert(!sdwaInst.SRC1_ABS);
|
||||
|
||||
processSDWA_src_helper(src0, origSrc0, src0_sel, src0_signExt,
|
||||
src0_abs, src0_neg);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* processSDWA_src is a helper function for implementing sub d-word
|
||||
* addressing instructions. This function may be called by many different
|
||||
* VOP2/VOPC instructions to do operations within a register.
|
||||
* processSDWA_dst is called after the math, while processSDWA_src is
|
||||
* called before the math.
|
||||
*/
|
||||
template<typename T>
|
||||
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T & src0, T & origSrc0,
|
||||
T & src1, T & origSrc1)
|
||||
{
|
||||
// local variables
|
||||
const SDWASelVals src0_sel = (SDWASelVals)sdwaInst.SRC0_SEL;
|
||||
const bool src0_signExt = sdwaInst.SRC0_SEXT;
|
||||
const bool src0_neg = sdwaInst.SRC0_NEG;
|
||||
const bool src0_abs = sdwaInst.SRC0_ABS;
|
||||
const SDWASelVals src1_sel = (SDWASelVals)sdwaInst.SRC1_SEL;
|
||||
const bool src1_signExt = sdwaInst.SRC1_SEXT;
|
||||
const bool src1_neg = sdwaInst.SRC1_NEG;
|
||||
const bool src1_abs = sdwaInst.SRC1_ABS;
|
||||
|
||||
processSDWA_src_helper(src0, origSrc0, src0_sel, src0_signExt,
|
||||
src0_abs, src0_neg);
|
||||
processSDWA_src_helper(src1, origSrc1, src1_sel, src1_signExt,
|
||||
src1_abs, src1_neg);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* processSDWA_dst is a helper function for implementing sub d-word
|
||||
* addressing instructions for the dst operand. This function may be
|
||||
* called by many different VOP1/VOP2/VOPC instructions to do operations
|
||||
* within a register. processSDWA_dst is called after the math, while
|
||||
* processSDWA_src is called before the math.
|
||||
*/
|
||||
template<typename T>
|
||||
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T & dst, T & origDst)
|
||||
{
|
||||
// local variables
|
||||
const SDWADstVals dst_unusedBits_format =
|
||||
(SDWADstVals)sdwaInst.DST_UNUSED;
|
||||
const SDWASelVals dst_sel = (SDWASelVals)sdwaInst.DST_SEL;
|
||||
const bool clamp = sdwaInst.CLAMP;
|
||||
|
||||
/**
|
||||
* STEP 1: select the appropriate bits for dst and pad/sign-extend as
|
||||
* appropriate.
|
||||
*/
|
||||
sdwaInstDstImpl(dst, origDst, clamp, dst_sel, dst_unusedBits_format);
|
||||
}
|
||||
} // namespace VegaISA
|
||||
|
||||
#endif // __ARCH_VEGA_INSTS_INST_UTIL_HH__
|
||||
44294
src/arch/amdgpu/vega/insts/instructions.cc
Normal file
44294
src/arch/amdgpu/vega/insts/instructions.cc
Normal file
File diff suppressed because it is too large
Load Diff
81649
src/arch/amdgpu/vega/insts/instructions.hh
Normal file
81649
src/arch/amdgpu/vega/insts/instructions.hh
Normal file
File diff suppressed because it is too large
Load Diff
2170
src/arch/amdgpu/vega/insts/op_encodings.cc
Normal file
2170
src/arch/amdgpu/vega/insts/op_encodings.cc
Normal file
File diff suppressed because it is too large
Load Diff
834
src/arch/amdgpu/vega/insts/op_encodings.hh
Normal file
834
src/arch/amdgpu/vega/insts/op_encodings.hh
Normal file
@@ -0,0 +1,834 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
|
||||
#define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_decoder.hh"
|
||||
#include "arch/amdgpu/vega/gpu_mem_helpers.hh"
|
||||
#include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
|
||||
#include "arch/amdgpu/vega/operand.hh"
|
||||
#include "debug/GPUExec.hh"
|
||||
#include "debug/VEGA.hh"
|
||||
#include "mem/ruby/system/RubySystem.hh"
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
struct BufferRsrcDescriptor
|
||||
{
|
||||
uint64_t baseAddr : 48;
|
||||
uint32_t stride : 14;
|
||||
uint32_t cacheSwizzle : 1;
|
||||
uint32_t swizzleEn : 1;
|
||||
uint32_t numRecords : 32;
|
||||
uint32_t dstSelX : 3;
|
||||
uint32_t dstSelY : 3;
|
||||
uint32_t dstSelZ : 3;
|
||||
uint32_t dstSelW : 3;
|
||||
uint32_t numFmt : 3;
|
||||
uint32_t dataFmt : 4;
|
||||
uint32_t elemSize : 2;
|
||||
uint32_t idxStride : 2;
|
||||
uint32_t addTidEn : 1;
|
||||
uint32_t atc : 1;
|
||||
uint32_t hashEn : 1;
|
||||
uint32_t heap : 1;
|
||||
uint32_t mType : 3;
|
||||
uint32_t type : 2;
|
||||
};
|
||||
|
||||
// --- purely virtual instruction classes ---
|
||||
|
||||
class Inst_SOP2 : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_SOP2(InFmt_SOP2*, const std::string &opcode);
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_SOP2 instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_SOP2 *);
|
||||
}; // Inst_SOP2
|
||||
|
||||
class Inst_SOPK : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_SOPK(InFmt_SOPK*, const std::string &opcode);
|
||||
~Inst_SOPK();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_SOPK instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_SOPK *);
|
||||
}; // Inst_SOPK
|
||||
|
||||
class Inst_SOP1 : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_SOP1(InFmt_SOP1*, const std::string &opcode);
|
||||
~Inst_SOP1();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_SOP1 instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_SOP1 *);
|
||||
}; // Inst_SOP1
|
||||
|
||||
class Inst_SOPC : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_SOPC(InFmt_SOPC*, const std::string &opcode);
|
||||
~Inst_SOPC();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_SOPC instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_SOPC *);
|
||||
}; // Inst_SOPC
|
||||
|
||||
class Inst_SOPP : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_SOPP(InFmt_SOPP*, const std::string &opcode);
|
||||
~Inst_SOPP();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_SOPP instData;
|
||||
}; // Inst_SOPP
|
||||
|
||||
class Inst_SMEM : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_SMEM(InFmt_SMEM*, const std::string &opcode);
|
||||
~Inst_SMEM();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* initiate a memory read access for N dwords
|
||||
*/
|
||||
template<int N>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
|
||||
MemCmd::ReadReq);
|
||||
}
|
||||
|
||||
/**
|
||||
* initiate a memory write access for N dwords
|
||||
*/
|
||||
template<int N>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
|
||||
MemCmd::WriteReq);
|
||||
}
|
||||
|
||||
/**
|
||||
* For normal s_load_dword/s_store_dword instruction addresses.
|
||||
*/
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr,
|
||||
ScalarRegU32 offset)
|
||||
{
|
||||
Addr vaddr = ((addr.rawData() + offset) & ~0x3);
|
||||
gpu_dyn_inst->scalarAddr = vaddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
|
||||
* The s_buffer instructions use the same buffer resource descriptor
|
||||
* as the MUBUF instructions.
|
||||
*/
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpu_dyn_inst,
|
||||
ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
|
||||
{
|
||||
BufferRsrcDescriptor rsrc_desc;
|
||||
ScalarRegU32 clamped_offset(offset);
|
||||
std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
|
||||
sizeof(BufferRsrcDescriptor));
|
||||
|
||||
/**
|
||||
* The address is clamped if:
|
||||
* Stride is zero: clamp if offset >= num_records
|
||||
* Stride is non-zero: clamp if offset > (stride * num_records)
|
||||
*/
|
||||
if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
|
||||
clamped_offset = rsrc_desc.numRecords;
|
||||
} else if (rsrc_desc.stride && offset
|
||||
> (rsrc_desc.stride * rsrc_desc.numRecords)) {
|
||||
clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
|
||||
}
|
||||
|
||||
Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
|
||||
gpu_dyn_inst->scalarAddr = vaddr;
|
||||
}
|
||||
|
||||
// first instruction DWORD
|
||||
InFmt_SMEM instData;
|
||||
// second instruction DWORD
|
||||
InFmt_SMEM_1 extData;
|
||||
}; // Inst_SMEM
|
||||
|
||||
class Inst_VOP2 : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_VOP2(InFmt_VOP2*, const std::string &opcode);
|
||||
~Inst_VOP2();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_VOP2 instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_VOP2 *);
|
||||
}; // Inst_VOP2
|
||||
|
||||
class Inst_VOP1 : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_VOP1(InFmt_VOP1*, const std::string &opcode);
|
||||
~Inst_VOP1();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_VOP1 instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_VOP1 *);
|
||||
}; // Inst_VOP1
|
||||
|
||||
class Inst_VOPC : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_VOPC(InFmt_VOPC*, const std::string &opcode);
|
||||
~Inst_VOPC();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_VOPC instData;
|
||||
// possible second DWORD
|
||||
InstFormat extData;
|
||||
uint32_t varSize;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_VOPC *);
|
||||
}; // Inst_VOPC
|
||||
|
||||
class Inst_VINTRP : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode);
|
||||
~Inst_VINTRP();
|
||||
|
||||
int instSize() const override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_VINTRP instData;
|
||||
}; // Inst_VINTRP
|
||||
|
||||
class Inst_VOP3 : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_VOP3(InFmt_VOP3*, const std::string &opcode, bool sgpr_dst);
|
||||
~Inst_VOP3();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_VOP3 instData;
|
||||
// second instruction DWORD
|
||||
InFmt_VOP3_1 extData;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_VOP3 *);
|
||||
/**
|
||||
* the v_cmp and readlane instructions in the VOP3
|
||||
* encoding are unique because they are the only
|
||||
* instructions that use the VDST field to specify
|
||||
* a scalar register destination. for VOP3::V_CMP insts
|
||||
* VDST specifies the arbitrary SGPR pair used to write
|
||||
* VCC. for V_READLANE VDST specifies the SGPR to return
|
||||
* the value of the selected lane in the source VGPR
|
||||
* from which we are reading.
|
||||
*/
|
||||
const bool sgprDst;
|
||||
}; // Inst_VOP3
|
||||
|
||||
class Inst_VOP3_SDST_ENC : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC*, const std::string &opcode);
|
||||
~Inst_VOP3_SDST_ENC();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_VOP3_SDST_ENC instData;
|
||||
// second instruction DWORD
|
||||
InFmt_VOP3_1 extData;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_VOP3_SDST_ENC *);
|
||||
}; // Inst_VOP3_SDST_ENC
|
||||
|
||||
class Inst_DS : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_DS(InFmt_DS*, const std::string &opcode);
|
||||
~Inst_DS();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
template<typename T>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane] + offset;
|
||||
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
|
||||
= wf->ldsChunk->read<T>(vaddr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
|
||||
Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
|
||||
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2]
|
||||
= wf->ldsChunk->read<T>(vaddr0);
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane * 2 + 1]
|
||||
= wf->ldsChunk->read<T>(vaddr1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane] + offset;
|
||||
wf->ldsChunk->write<T>(vaddr,
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
|
||||
Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
|
||||
wf->ldsChunk->write<T>(vaddr0, (reinterpret_cast<T*>(
|
||||
gpuDynInst->d_data))[lane * 2]);
|
||||
wf->ldsChunk->write<T>(vaddr1, (reinterpret_cast<T*>(
|
||||
gpuDynInst->d_data))[lane * 2 + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
gpuDynInst->addr.at(lane) = (Addr)addr[lane];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// first instruction DWORD
|
||||
InFmt_DS instData;
|
||||
// second instruction DWORD
|
||||
InFmt_DS_1 extData;
|
||||
}; // Inst_DS
|
||||
|
||||
class Inst_MUBUF : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode);
|
||||
~Inst_MUBUF();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
template<typename T>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
// temporarily modify exec_mask to supress memory accesses to oob
|
||||
// regions. Only issue memory requests for lanes that have their
|
||||
// exec_mask set and are not out of bounds.
|
||||
VectorMask old_exec_mask = gpuDynInst->exec_mask;
|
||||
gpuDynInst->exec_mask &= ~oobMask;
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
|
||||
gpuDynInst->exec_mask = old_exec_mask;
|
||||
}
|
||||
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
// temporarily modify exec_mask to supress memory accesses to oob
|
||||
// regions. Only issue memory requests for lanes that have their
|
||||
// exec_mask set and are not out of bounds.
|
||||
VectorMask old_exec_mask = gpuDynInst->exec_mask;
|
||||
gpuDynInst->exec_mask &= ~oobMask;
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
|
||||
gpuDynInst->exec_mask = old_exec_mask;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
// temporarily modify exec_mask to supress memory accesses to oob
|
||||
// regions. Only issue memory requests for lanes that have their
|
||||
// exec_mask set and are not out of bounds.
|
||||
VectorMask old_exec_mask = gpuDynInst->exec_mask;
|
||||
gpuDynInst->exec_mask &= ~oobMask;
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
|
||||
gpuDynInst->exec_mask = old_exec_mask;
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
// temporarily modify exec_mask to supress memory accesses to oob
|
||||
// regions. Only issue memory requests for lanes that have their
|
||||
// exec_mask set and are not out of bounds.
|
||||
VectorMask old_exec_mask = gpuDynInst->exec_mask;
|
||||
gpuDynInst->exec_mask &= ~oobMask;
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
|
||||
gpuDynInst->exec_mask = old_exec_mask;
|
||||
}
|
||||
|
||||
void
|
||||
injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
// create request and set flags
|
||||
gpuDynInst->resetEntireStatusVector();
|
||||
gpuDynInst->setStatusVector(0, 1);
|
||||
auto req = std::make_shared<Request>(0, 0, 0, 0,
|
||||
gpuDynInst->computeUnit()->masterId(),
|
||||
0, gpuDynInst->wfDynId);
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
gpuDynInst->computeUnit()->
|
||||
injectGlobalMemFence(gpuDynInst, false, req);
|
||||
}
|
||||
|
||||
/**
|
||||
* MUBUF insructions calculate their addresses as follows:
|
||||
*
|
||||
* index = (IDXEN ? vgpr_idx : 0) + (const_add_tid_en ? TID : 0)
|
||||
* offset = (OFFEN ? vgpr_off : 0) + inst_off
|
||||
*
|
||||
* / ====================== LINEAR ADDRESSING ====================== /
|
||||
* VADDR = base + sgpr_off + offset + stride * index
|
||||
*
|
||||
* / ===================== SWIZZLED ADDRESSING ===================== /
|
||||
* index_msb = index / const_index_stride
|
||||
* index_lsb = index % const_index_stride
|
||||
* offset_msb = offset / const_element_size
|
||||
* offset_lsb = offset % const_element_size
|
||||
* buffer_offset = ((index_msb * stride + offset_msb *
|
||||
* const_element_size) * const_index_stride +
|
||||
* index_lsb * const_element_size + offset_lsb)
|
||||
*
|
||||
* VADDR = base + sgpr_off + buffer_offset
|
||||
*/
|
||||
template<typename VOFF, typename VIDX, typename SRSRC, typename SOFF>
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx,
|
||||
SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
|
||||
{
|
||||
Addr vaddr = 0;
|
||||
Addr base_addr = 0;
|
||||
Addr stride = 0;
|
||||
Addr buf_idx = 0;
|
||||
Addr buf_off = 0;
|
||||
BufferRsrcDescriptor rsrc_desc;
|
||||
|
||||
std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
|
||||
sizeof(BufferRsrcDescriptor));
|
||||
|
||||
base_addr = rsrc_desc.baseAddr;
|
||||
|
||||
stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
|
||||
+ rsrc_desc.stride) : rsrc_desc.stride;
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
vaddr = base_addr + s_offset.rawData();
|
||||
/**
|
||||
* first we calculate the buffer's index and offset.
|
||||
* these will be used for either linear or swizzled
|
||||
* buffers.
|
||||
*/
|
||||
buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
|
||||
|
||||
buf_off = v_off[lane] + inst_offset;
|
||||
|
||||
|
||||
/**
|
||||
* Range check behavior causes out of range accesses to
|
||||
* to be treated differently. Out of range accesses return
|
||||
* 0 for loads and are ignored for stores. For
|
||||
* non-formatted accesses, this is done on a per-lane
|
||||
* basis.
|
||||
*/
|
||||
if (stride == 0 || !rsrc_desc.swizzleEn) {
|
||||
if (buf_off + stride * buf_idx >=
|
||||
rsrc_desc.numRecords - s_offset.rawData()) {
|
||||
DPRINTF(VEGA, "mubuf out-of-bounds condition 1: "
|
||||
"lane = %d, buffer_offset = %llx, "
|
||||
"const_stride = %llx, "
|
||||
"const_num_records = %llx\n",
|
||||
lane, buf_off + stride * buf_idx,
|
||||
stride, rsrc_desc.numRecords);
|
||||
oobMask.set(lane);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (stride != 0 && rsrc_desc.swizzleEn) {
|
||||
if (buf_idx >= rsrc_desc.numRecords ||
|
||||
buf_off >= stride) {
|
||||
DPRINTF(VEGA, "mubuf out-of-bounds condition 2: "
|
||||
"lane = %d, offset = %llx, "
|
||||
"index = %llx, "
|
||||
"const_num_records = %llx\n",
|
||||
lane, buf_off, buf_idx,
|
||||
rsrc_desc.numRecords);
|
||||
oobMask.set(lane);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (rsrc_desc.swizzleEn) {
|
||||
Addr idx_stride = 8 << rsrc_desc.idxStride;
|
||||
Addr elem_size = 2 << rsrc_desc.elemSize;
|
||||
Addr idx_msb = buf_idx / idx_stride;
|
||||
Addr idx_lsb = buf_idx % idx_stride;
|
||||
Addr off_msb = buf_off / elem_size;
|
||||
Addr off_lsb = buf_off % elem_size;
|
||||
DPRINTF(VEGA, "mubuf swizzled lane %d: "
|
||||
"idx_stride = %llx, elem_size = %llx, "
|
||||
"idx_msb = %llx, idx_lsb = %llx, "
|
||||
"off_msb = %llx, off_lsb = %llx\n",
|
||||
lane, idx_stride, elem_size, idx_msb, idx_lsb,
|
||||
off_msb, off_lsb);
|
||||
|
||||
vaddr += ((idx_msb * stride + off_msb * elem_size)
|
||||
* idx_stride + idx_lsb * elem_size + off_lsb);
|
||||
} else {
|
||||
vaddr += buf_off + stride * buf_idx;
|
||||
}
|
||||
|
||||
DPRINTF(VEGA, "Calculating mubuf address for lane %d: "
|
||||
"vaddr = %llx, base_addr = %llx, "
|
||||
"stride = %llx, buf_idx = %llx, buf_off = %llx\n",
|
||||
lane, vaddr, base_addr, stride,
|
||||
buf_idx, buf_off);
|
||||
gpuDynInst->addr.at(lane) = vaddr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// first instruction DWORD
|
||||
InFmt_MUBUF instData;
|
||||
// second instruction DWORD
|
||||
InFmt_MUBUF_1 extData;
|
||||
// Mask of lanes with out-of-bounds accesses. Needs to be tracked
|
||||
// seperately from the exec_mask so that we remember to write zero
|
||||
// to the registers associated with out of bounds lanes.
|
||||
VectorMask oobMask;
|
||||
}; // Inst_MUBUF
|
||||
|
||||
class Inst_MTBUF : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode);
|
||||
~Inst_MTBUF();
|
||||
|
||||
int instSize() const override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_MTBUF instData;
|
||||
// second instruction DWORD
|
||||
InFmt_MTBUF_1 extData;
|
||||
|
||||
private:
|
||||
bool hasSecondDword(InFmt_MTBUF *);
|
||||
}; // Inst_MTBUF
|
||||
|
||||
class Inst_MIMG : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_MIMG(InFmt_MIMG*, const std::string &opcode);
|
||||
~Inst_MIMG();
|
||||
|
||||
int instSize() const override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_MIMG instData;
|
||||
// second instruction DWORD
|
||||
InFmt_MIMG_1 extData;
|
||||
}; // Inst_MIMG
|
||||
|
||||
class Inst_EXP : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_EXP(InFmt_EXP*, const std::string &opcode);
|
||||
~Inst_EXP();
|
||||
|
||||
int instSize() const override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
InFmt_EXP instData;
|
||||
// second instruction DWORD
|
||||
InFmt_EXP_1 extData;
|
||||
}; // Inst_EXP
|
||||
|
||||
class Inst_FLAT : public VEGAGPUStaticInst
|
||||
{
|
||||
public:
|
||||
Inst_FLAT(InFmt_FLAT*, const std::string &opcode);
|
||||
~Inst_FLAT();
|
||||
|
||||
int instSize() const override;
|
||||
void generateDisassembly() override;
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
template<typename T>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initAtomicAccess(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
|
||||
}
|
||||
|
||||
void
|
||||
calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
|
||||
{
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
gpuDynInst->addr.at(lane) = addr[lane];
|
||||
}
|
||||
}
|
||||
gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
|
||||
}
|
||||
|
||||
// first instruction DWORD
|
||||
InFmt_FLAT instData;
|
||||
// second instruction DWORD
|
||||
InFmt_FLAT_1 extData;
|
||||
}; // Inst_FLAT
|
||||
} // namespace VegaISA
|
||||
|
||||
#endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
|
||||
101
src/arch/amdgpu/vega/isa.cc
Normal file
101
src/arch/amdgpu/vega/isa.cc
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_isa.hh"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
GPUISA::GPUISA(Wavefront &wf) : wavefront(wf), m0(0)
|
||||
{
|
||||
}
|
||||
|
||||
ScalarRegU32
|
||||
GPUISA::readMiscReg(int opIdx) const
|
||||
{
|
||||
switch (opIdx) {
|
||||
case REG_M0:
|
||||
return m0;
|
||||
case REG_ZERO:
|
||||
return 0;
|
||||
case REG_SCC:
|
||||
return statusReg.SCC;
|
||||
default:
|
||||
fatal("attempting to read from unsupported or non-readable "
|
||||
"register. selector val: %i\n", opIdx);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GPUISA::writeMiscReg(int opIdx, ScalarRegU32 operandVal)
|
||||
{
|
||||
switch (opIdx) {
|
||||
case REG_M0:
|
||||
m0 = operandVal;
|
||||
break;
|
||||
case REG_SCC:
|
||||
statusReg.SCC = operandVal ? 1 : 0;
|
||||
break;
|
||||
default:
|
||||
fatal("attempting to write to an unsupported or non-writable "
|
||||
"register. selector val: %i\n", opIdx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GPUISA::advancePC(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
wavefront.pc(wavefront.pc()
|
||||
+ gpuDynInst->staticInstruction()->instSize());
|
||||
}
|
||||
|
||||
const std::array<const ScalarRegU32, NumPosConstRegs>
|
||||
GPUISA::posConstRegs = { {
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
|
||||
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
|
||||
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
|
||||
} };
|
||||
|
||||
const std::array<const ScalarRegI32, NumNegConstRegs>
|
||||
GPUISA::negConstRegs = { {
|
||||
-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
|
||||
-16
|
||||
} };
|
||||
} // namespace VegaISA
|
||||
740
src/arch/amdgpu/vega/operand.hh
Normal file
740
src/arch/amdgpu/vega/operand.hh
Normal file
@@ -0,0 +1,740 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_VEGA_OPERAND_HH__
|
||||
#define __ARCH_VEGA_OPERAND_HH__
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_registers.hh"
|
||||
#include "arch/generic/vec_reg.hh"
|
||||
#include "gpu-compute/scalar_register_file.hh"
|
||||
#include "gpu-compute/vector_register_file.hh"
|
||||
#include "gpu-compute/wavefront.hh"
|
||||
|
||||
/**
|
||||
* classes that represnt vector/scalar operands in VEGA ISA. these classes
|
||||
* wrap the generic vector register type (i.e., src/arch/generic/vec_reg.hh)
|
||||
* and allow them to be manipulated in ways that are unique to VEGA insts.
|
||||
*/
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
/**
|
||||
* convenience traits so we can automatically infer the correct FP type
|
||||
* without looking at the number of dwords (i.e., to determine if we
|
||||
* need a float or a double when creating FP constants).
|
||||
*/
|
||||
template<typename T> struct OpTraits { typedef float FloatT; };
|
||||
template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };
|
||||
template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };
|
||||
|
||||
class Operand
|
||||
{
|
||||
public:
|
||||
Operand() = delete;
|
||||
|
||||
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
|
||||
: _gpuDynInst(gpuDynInst), _opIdx(opIdx)
|
||||
{
|
||||
assert(_gpuDynInst);
|
||||
assert(_opIdx >= 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* read from and write to the underlying register(s) that
|
||||
* this operand is referring to.
|
||||
*/
|
||||
virtual void read() = 0;
|
||||
virtual void write() = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* instruction object that owns this operand
|
||||
*/
|
||||
GPUDynInstPtr _gpuDynInst;
|
||||
/**
|
||||
* op selector value for this operand. note that this is not
|
||||
* the same as the register file index, be it scalar or vector.
|
||||
* this could refer to inline constants, system regs, or even
|
||||
* special values.
|
||||
*/
|
||||
int _opIdx;
|
||||
};
|
||||
|
||||
template<typename DataType, bool Const, size_t NumDwords>
|
||||
class ScalarOperand;
|
||||
|
||||
template<typename DataType, bool Const,
|
||||
size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>
|
||||
class VecOperand final : public Operand
|
||||
{
|
||||
static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
|
||||
"Incorrect number of DWORDS for VEGA operand.");
|
||||
|
||||
public:
|
||||
VecOperand() = delete;
|
||||
|
||||
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
|
||||
: Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
|
||||
negMod(false), scRegData(gpuDynInst, _opIdx),
|
||||
vrfData{{ nullptr }}
|
||||
{
|
||||
vecReg.zero();
|
||||
}
|
||||
|
||||
~VecOperand()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* certain vector operands can read from the vrf/srf or constants.
|
||||
* we use this method to first determine the type of the operand,
|
||||
* then we read from the appropriate source. if vector we read
|
||||
* directly from the vrf. if scalar, we read in the data through
|
||||
* the scalar operand component. this should only be used for VSRC
|
||||
* operands.
|
||||
*/
|
||||
void
|
||||
readSrc()
|
||||
{
|
||||
if (isVectorReg(_opIdx)) {
|
||||
_opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()
|
||||
->reservedScalarRegs);
|
||||
read();
|
||||
} else {
|
||||
readScalar();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* read from the vrf. this should only be used by vector inst
|
||||
* source operands that are explicitly vector (i.e., VSRC).
|
||||
*/
|
||||
void
|
||||
read() override
|
||||
{
|
||||
assert(_gpuDynInst);
|
||||
assert(_gpuDynInst->wavefront());
|
||||
assert(_gpuDynInst->computeUnit());
|
||||
Wavefront *wf = _gpuDynInst->wavefront();
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
|
||||
for (auto i = 0; i < NumDwords; ++i) {
|
||||
int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx + i);
|
||||
vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
|
||||
|
||||
DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);
|
||||
cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
|
||||
}
|
||||
|
||||
if (NumDwords == 1) {
|
||||
assert(vrfData[0]);
|
||||
auto vgpr = vecReg.template as<DataType>();
|
||||
auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
std::memcpy((void*)&vgpr[lane],
|
||||
(void*)®_file_vgpr[lane], sizeof(DataType));
|
||||
}
|
||||
} else if (NumDwords == 2) {
|
||||
assert(vrfData[0]);
|
||||
assert(vrfData[1]);
|
||||
auto vgpr = vecReg.template as<VecElemU64>();
|
||||
auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
|
||||
auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
VecElemU64 tmp_val(0);
|
||||
((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
|
||||
((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
|
||||
vgpr[lane] = tmp_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* write to the vrf. we maintain a copy of the underlying vector
|
||||
* reg(s) for this operand (i.e., vrfData/scRegData), as well as a
|
||||
* temporary vector register representation (i.e., vecReg) of the
|
||||
* vector register, which allows the execute() methods of instructions
|
||||
* to easily write their operand data using operator[] regardless of
|
||||
* their size. after the result is calculated we use write() to write
|
||||
* the data to the actual register file storage. this allows us to do
|
||||
* type conversion, etc., in a single call as opposed to doing it
|
||||
* in each execute() method.
|
||||
*/
|
||||
void
|
||||
write() override
|
||||
{
|
||||
assert(_gpuDynInst);
|
||||
assert(_gpuDynInst->wavefront());
|
||||
assert(_gpuDynInst->computeUnit());
|
||||
Wavefront *wf = _gpuDynInst->wavefront();
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
VectorMask &exec_mask = _gpuDynInst->isLoad()
|
||||
? _gpuDynInst->exec_mask : wf->execMask();
|
||||
|
||||
if (NumDwords == 1) {
|
||||
int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx);
|
||||
vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
|
||||
assert(vrfData[0]);
|
||||
auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
|
||||
auto vgpr = vecReg.template as<DataType>();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
|
||||
std::memcpy((void*)®_file_vgpr[lane],
|
||||
(void*)&vgpr[lane], sizeof(DataType));
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);
|
||||
cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
|
||||
} else if (NumDwords == 2) {
|
||||
int vgprIdx0 = cu->registerManager.mapVgpr(wf, _opIdx);
|
||||
int vgprIdx1 = cu->registerManager.mapVgpr(wf, _opIdx + 1);
|
||||
vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);
|
||||
vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);
|
||||
assert(vrfData[0]);
|
||||
assert(vrfData[1]);
|
||||
auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
|
||||
auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
|
||||
auto vgpr = vecReg.template as<VecElemU64>();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
|
||||
reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];
|
||||
reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
|
||||
cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);
|
||||
cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
negModifier()
|
||||
{
|
||||
negMod = true;
|
||||
}
|
||||
|
||||
void
|
||||
absModifier()
|
||||
{
|
||||
absMod = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* getter [] operator. only enable if this operand is constant
|
||||
* (i.e, a source operand) and if it can be represented using
|
||||
* primitive types (i.e., 8b to 64b primitives).
|
||||
*/
|
||||
template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
|
||||
typename std::enable_if<Condition, const DataType>::type
|
||||
operator[](size_t idx) const
|
||||
{
|
||||
assert(idx < NumVecElemPerVecReg);
|
||||
|
||||
if (scalar) {
|
||||
DataType ret_val = scRegData.rawData();
|
||||
|
||||
if (absMod) {
|
||||
assert(std::is_floating_point<DataType>::value);
|
||||
ret_val = std::fabs(ret_val);
|
||||
}
|
||||
|
||||
if (negMod) {
|
||||
assert(std::is_floating_point<DataType>::value);
|
||||
ret_val = -ret_val;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
} else {
|
||||
auto vgpr = vecReg.template as<DataType>();
|
||||
DataType ret_val = vgpr[idx];
|
||||
|
||||
if (absMod) {
|
||||
assert(std::is_floating_point<DataType>::value);
|
||||
ret_val = std::fabs(ret_val);
|
||||
}
|
||||
|
||||
if (negMod) {
|
||||
assert(std::is_floating_point<DataType>::value);
|
||||
ret_val = -ret_val;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* setter [] operator. only enable if this operand is non-constant
|
||||
* (i.e, a destination operand) and if it can be represented using
|
||||
* primitive types (i.e., 8b to 64b primitives).
|
||||
*/
|
||||
template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
|
||||
typename std::enable_if<Condition, DataType&>::type
|
||||
operator[](size_t idx)
|
||||
{
|
||||
assert(!scalar);
|
||||
assert(idx < NumVecElemPerVecReg);
|
||||
|
||||
return vecReg.template as<DataType>()[idx];
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* if we determine that this operand is a scalar (reg or constant)
|
||||
* then we read the scalar data into the scalar operand data member.
|
||||
*/
|
||||
void
|
||||
readScalar()
|
||||
{
|
||||
scalar = true;
|
||||
scRegData.read();
|
||||
}
|
||||
|
||||
using VecRegCont = typename std::conditional<NumDwords == 2,
|
||||
VecRegContainerU64, typename std::conditional<sizeof(DataType)
|
||||
== sizeof(VecElemU16), VecRegContainerU16,
|
||||
typename std::conditional<sizeof(DataType)
|
||||
== sizeof(VecElemU8), VecRegContainerU8,
|
||||
VecRegContainerU32>::type>::type>::type;
|
||||
|
||||
/**
|
||||
* whether this operand a scalar or not.
|
||||
*/
|
||||
bool scalar;
|
||||
/**
|
||||
* absolute value and negative modifiers. VOP3 instructions
|
||||
* may indicate that their input/output operands must be
|
||||
* modified, either by taking the absolute value or negating
|
||||
* them. these bools indicate which modifier, if any, to use.
|
||||
*/
|
||||
bool absMod;
|
||||
bool negMod;
|
||||
/**
|
||||
* this holds all the operand data in a single vector register
|
||||
* object (i.e., if an operand is 64b, this will hold the data
|
||||
* from both registers the operand is using).
|
||||
*/
|
||||
VecRegCont vecReg;
|
||||
/**
|
||||
* for src operands that read scalars (i.e., scalar regs or
|
||||
* a scalar constant).
|
||||
*/
|
||||
ScalarOperand<DataType, Const, NumDwords> scRegData;
|
||||
/**
|
||||
* pointers to the underlyding registers (i.e., the actual
|
||||
* registers in the register file).
|
||||
*/
|
||||
std::array<VecRegContainerU32*, NumDwords> vrfData;
|
||||
};
|
||||
|
||||
template<typename DataType, bool Const,
|
||||
size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>
|
||||
class ScalarOperand final : public Operand
|
||||
{
|
||||
static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
|
||||
"Incorrect number of DWORDS for VEGA operand.");
|
||||
public:
|
||||
ScalarOperand() = delete;
|
||||
|
||||
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
|
||||
: Operand(gpuDynInst, opIdx)
|
||||
{
|
||||
std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));
|
||||
}
|
||||
|
||||
~ScalarOperand()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* we store scalar data in a std::array, however if we need the
|
||||
* full operand data we use this method to copy all elements of
|
||||
* the scalar operand data to a single primitive container. only
|
||||
* useful for 8b to 64b primitive types, as they are the only types
|
||||
* that we need to perform computation on.
|
||||
*/
|
||||
template<bool Condition = NumDwords == 1 || NumDwords == 2>
|
||||
typename std::enable_if<Condition, DataType>::type
|
||||
rawData() const
|
||||
{
|
||||
assert(sizeof(DataType) <= sizeof(srfData));
|
||||
DataType raw_data((DataType)0);
|
||||
std::memcpy((void*)&raw_data, (void*)srfData.data(),
|
||||
sizeof(DataType));
|
||||
|
||||
return raw_data;
|
||||
}
|
||||
|
||||
void*
|
||||
rawDataPtr()
|
||||
{
|
||||
return (void*)srfData.data();
|
||||
}
|
||||
|
||||
void
|
||||
read() override
|
||||
{
|
||||
Wavefront *wf = _gpuDynInst->wavefront();
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
|
||||
if (!isScalarReg(_opIdx)) {
|
||||
readSpecialVal();
|
||||
} else {
|
||||
for (auto i = 0; i < NumDwords; ++i) {
|
||||
int sgprIdx = regIdx(i);
|
||||
srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);
|
||||
DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);
|
||||
cu->srf[wf->simdId]->printReg(wf, sgprIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
write() override
|
||||
{
|
||||
Wavefront *wf = _gpuDynInst->wavefront();
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
|
||||
if (!isScalarReg(_opIdx)) {
|
||||
if (_opIdx == REG_EXEC_LO) {
|
||||
ScalarRegU64 new_exec_mask_val
|
||||
= wf->execMask().to_ullong();
|
||||
if (NumDwords == 1) {
|
||||
std::memcpy((void*)&new_exec_mask_val,
|
||||
(void*)srfData.data(), sizeof(VecElemU32));
|
||||
} else if (NumDwords == 2) {
|
||||
std::memcpy((void*)&new_exec_mask_val,
|
||||
(void*)srfData.data(), sizeof(VecElemU64));
|
||||
} else {
|
||||
panic("Trying to write more than 2 DWORDS to EXEC\n");
|
||||
}
|
||||
VectorMask new_exec_mask(new_exec_mask_val);
|
||||
wf->execMask() = new_exec_mask;
|
||||
DPRINTF(GPUSRF, "Write EXEC\n");
|
||||
DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
|
||||
} else if (_opIdx == REG_EXEC_HI) {
|
||||
/**
|
||||
* If we're writing only the upper half of the EXEC mask
|
||||
* this ought to be a single dword operand.
|
||||
*/
|
||||
assert(NumDwords == 1);
|
||||
ScalarRegU32 new_exec_mask_hi_val(0);
|
||||
ScalarRegU64 new_exec_mask_val
|
||||
= wf->execMask().to_ullong();
|
||||
std::memcpy((void*)&new_exec_mask_hi_val,
|
||||
(void*)srfData.data(), sizeof(new_exec_mask_hi_val));
|
||||
replaceBits(new_exec_mask_val, 63, 32,
|
||||
new_exec_mask_hi_val);
|
||||
VectorMask new_exec_mask(new_exec_mask_val);
|
||||
wf->execMask() = new_exec_mask;
|
||||
DPRINTF(GPUSRF, "Write EXEC\n");
|
||||
DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
|
||||
} else {
|
||||
_gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
|
||||
}
|
||||
} else {
|
||||
for (auto i = 0; i < NumDwords; ++i) {
|
||||
int sgprIdx = regIdx(i);
|
||||
auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);
|
||||
if (_gpuDynInst->isLoad()) {
|
||||
assert(sizeof(DataType) <= sizeof(ScalarRegU64));
|
||||
sgpr = reinterpret_cast<ScalarRegU32*>(
|
||||
_gpuDynInst->scalar_data)[i];
|
||||
} else {
|
||||
sgpr = srfData[i];
|
||||
}
|
||||
DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);
|
||||
cu->srf[wf->simdId]->printReg(wf, sgprIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bit access to scalar data. primarily used for setting vcc bits.
|
||||
*/
|
||||
template<bool Condition = NumDwords == 1 || NumDwords == 2>
|
||||
typename std::enable_if<Condition, void>::type
|
||||
setBit(int bit, int bit_val)
|
||||
{
|
||||
DataType &sgpr = *((DataType*)srfData.data());
|
||||
replaceBits(sgpr, bit, bit_val);
|
||||
}
|
||||
|
||||
template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
|
||||
typename std::enable_if<Condition, ScalarOperand&>::type
|
||||
operator=(DataType rhs)
|
||||
{
|
||||
std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* we have determined that we are not reading our scalar operand data
|
||||
* from the register file, so here we figure out which special value
|
||||
* we are reading (i.e., float constant, int constant, inline
|
||||
* constant, or various other system registers (e.g., exec mask).
|
||||
*/
|
||||
void
|
||||
readSpecialVal()
|
||||
{
|
||||
assert(NumDwords == 1 || NumDwords == 2);
|
||||
|
||||
switch(_opIdx) {
|
||||
case REG_EXEC_LO:
|
||||
{
|
||||
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
|
||||
execMask().to_ullong();
|
||||
std::memcpy((void*)srfData.data(), (void*)&exec_mask,
|
||||
sizeof(srfData));
|
||||
DPRINTF(GPUSRF, "Read EXEC\n");
|
||||
DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
|
||||
}
|
||||
break;
|
||||
case REG_EXEC_HI:
|
||||
{
|
||||
/**
|
||||
* If we're reading only the upper half of the EXEC mask
|
||||
* this ought to be a single dword operand.
|
||||
*/
|
||||
assert(NumDwords == 1);
|
||||
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()
|
||||
->execMask().to_ullong();
|
||||
|
||||
ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32);
|
||||
std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi,
|
||||
sizeof(srfData));
|
||||
DPRINTF(GPUSRF, "Read EXEC_HI\n");
|
||||
DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi);
|
||||
}
|
||||
break;
|
||||
case REG_SRC_SWDA:
|
||||
case REG_SRC_DPP:
|
||||
case REG_SRC_LITERAL:
|
||||
assert(NumDwords == 1);
|
||||
srfData[0] = _gpuDynInst->srcLiteral();
|
||||
break;
|
||||
case REG_POS_HALF:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT pos_half = 0.5;
|
||||
std::memcpy((void*)srfData.data(), (void*)&pos_half,
|
||||
sizeof(srfData));
|
||||
|
||||
}
|
||||
break;
|
||||
case REG_NEG_HALF:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT neg_half = -0.5;
|
||||
std::memcpy((void*)srfData.data(), (void*)&neg_half,
|
||||
sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_POS_ONE:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT pos_one = 1.0;
|
||||
std::memcpy(srfData.data(), &pos_one, sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_NEG_ONE:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT neg_one = -1.0;
|
||||
std::memcpy(srfData.data(), &neg_one, sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_POS_TWO:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT pos_two = 2.0;
|
||||
std::memcpy(srfData.data(), &pos_two, sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_NEG_TWO:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT neg_two = -2.0;
|
||||
std::memcpy(srfData.data(), &neg_two, sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_POS_FOUR:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT pos_four = 4.0;
|
||||
std::memcpy(srfData.data(), &pos_four, sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_NEG_FOUR:
|
||||
{
|
||||
typename OpTraits<DataType>::FloatT neg_four = -4.0;
|
||||
std::memcpy((void*)srfData.data(), (void*)&neg_four ,
|
||||
sizeof(srfData));
|
||||
}
|
||||
break;
|
||||
case REG_PI:
|
||||
{
|
||||
assert(sizeof(DataType) == sizeof(ScalarRegF64)
|
||||
|| sizeof(DataType) == sizeof(ScalarRegF32));
|
||||
|
||||
const ScalarRegU32 pi_u32(0x3e22f983UL);
|
||||
const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);
|
||||
|
||||
if (sizeof(DataType) == sizeof(ScalarRegF64)) {
|
||||
std::memcpy((void*)srfData.data(),
|
||||
(void*)&pi_u64, sizeof(srfData));
|
||||
} else {
|
||||
std::memcpy((void*)srfData.data(),
|
||||
(void*)&pi_u32, sizeof(srfData));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
{
|
||||
assert(sizeof(DataType) <= sizeof(srfData));
|
||||
DataType misc_val(0);
|
||||
if (isConstVal(_opIdx)) {
|
||||
misc_val = (DataType)_gpuDynInst
|
||||
->readConstVal<DataType>(_opIdx);
|
||||
} else {
|
||||
misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
|
||||
}
|
||||
std::memcpy((void*)srfData.data(), (void*)&misc_val,
|
||||
sizeof(DataType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* for scalars we need to do some extra work to figure out how to
|
||||
* map the op selector to the sgpr idx because some op selectors
|
||||
* do not map directly to the srf (i.e., vcc/flat_scratch).
|
||||
*/
|
||||
int
|
||||
regIdx(int dword) const
|
||||
{
|
||||
Wavefront *wf = _gpuDynInst->wavefront();
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
int sgprIdx(-1);
|
||||
|
||||
if (_opIdx == REG_VCC_LO) {
|
||||
sgprIdx = cu->registerManager
|
||||
.mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
|
||||
} else if (_opIdx == REG_FLAT_SCRATCH_HI) {
|
||||
sgprIdx = cu->registerManager
|
||||
.mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
|
||||
} else if (_opIdx == REG_FLAT_SCRATCH_LO) {
|
||||
assert(NumDwords == 1);
|
||||
sgprIdx = cu->registerManager
|
||||
.mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
|
||||
} else {
|
||||
sgprIdx = cu->registerManager.mapSgpr(wf, _opIdx + dword);
|
||||
}
|
||||
|
||||
assert(sgprIdx > -1);
|
||||
|
||||
return sgprIdx;
|
||||
}
|
||||
|
||||
/**
|
||||
* in VEGA each register is represented as a 32b unsigned value,
|
||||
* however operands may require up to 16 registers, so we store
|
||||
* all the individual 32b components here. for sub-dword operand
|
||||
* we still consider them to be 1 dword because the minimum size
|
||||
* of a register is 1 dword. this class will take care to do the
|
||||
* proper packing/unpacking of sub-dword operands.
|
||||
*/
|
||||
std::array<ScalarRegU32, NumDwords> srfData;
|
||||
};
|
||||
|
||||
// typedefs for the various sizes/types of scalar operands
|
||||
using ScalarOperandU8 = ScalarOperand<ScalarRegU8, false, 1>;
|
||||
using ScalarOperandI8 = ScalarOperand<ScalarRegI8, false, 1>;
|
||||
using ScalarOperandU16 = ScalarOperand<ScalarRegU16, false, 1>;
|
||||
using ScalarOperandI16 = ScalarOperand<ScalarRegI16, false, 1>;
|
||||
using ScalarOperandU32 = ScalarOperand<ScalarRegU32, false>;
|
||||
using ScalarOperandI32 = ScalarOperand<ScalarRegI32, false>;
|
||||
using ScalarOperandF32 = ScalarOperand<ScalarRegF32, false>;
|
||||
using ScalarOperandU64 = ScalarOperand<ScalarRegU64, false>;
|
||||
using ScalarOperandI64 = ScalarOperand<ScalarRegI64, false>;
|
||||
using ScalarOperandF64 = ScalarOperand<ScalarRegF64, false>;
|
||||
using ScalarOperandU128 = ScalarOperand<ScalarRegU32, false, 4>;
|
||||
using ScalarOperandU256 = ScalarOperand<ScalarRegU32, false, 8>;
|
||||
using ScalarOperandU512 = ScalarOperand<ScalarRegU32, false, 16>;
|
||||
// non-writeable versions of scalar operands
|
||||
using ConstScalarOperandU8 = ScalarOperand<ScalarRegU8, true, 1>;
|
||||
using ConstScalarOperandI8 = ScalarOperand<ScalarRegI8, true, 1>;
|
||||
using ConstScalarOperandU16 = ScalarOperand<ScalarRegU16, true, 1>;
|
||||
using ConstScalarOperandI16 = ScalarOperand<ScalarRegI16, true, 1>;
|
||||
using ConstScalarOperandU32 = ScalarOperand<ScalarRegU32, true>;
|
||||
using ConstScalarOperandI32 = ScalarOperand<ScalarRegI32, true>;
|
||||
using ConstScalarOperandF32 = ScalarOperand<ScalarRegF32, true>;
|
||||
using ConstScalarOperandU64 = ScalarOperand<ScalarRegU64, true>;
|
||||
using ConstScalarOperandI64 = ScalarOperand<ScalarRegI64, true>;
|
||||
using ConstScalarOperandF64 = ScalarOperand<ScalarRegF64, true>;
|
||||
using ConstScalarOperandU128 = ScalarOperand<ScalarRegU32, true, 4>;
|
||||
using ConstScalarOperandU256 = ScalarOperand<ScalarRegU32, true, 8>;
|
||||
using ConstScalarOperandU512 = ScalarOperand<ScalarRegU32, true, 16>;
|
||||
// typedefs for the various sizes/types of vector operands
|
||||
using VecOperandU8 = VecOperand<VecElemU8, false, 1>;
|
||||
using VecOperandI8 = VecOperand<VecElemI8, false, 1>;
|
||||
using VecOperandU16 = VecOperand<VecElemU16, false, 1>;
|
||||
using VecOperandI16 = VecOperand<VecElemI16, false, 1>;
|
||||
using VecOperandU32 = VecOperand<VecElemU32, false>;
|
||||
using VecOperandI32 = VecOperand<VecElemI32, false>;
|
||||
using VecOperandF32 = VecOperand<VecElemF32, false>;
|
||||
using VecOperandU64 = VecOperand<VecElemU64, false>;
|
||||
using VecOperandF64 = VecOperand<VecElemF64, false>;
|
||||
using VecOperandI64 = VecOperand<VecElemI64, false>;
|
||||
using VecOperandU96 = VecOperand<VecElemU32, false, 3>;
|
||||
using VecOperandU128 = VecOperand<VecElemU32, false, 4>;
|
||||
using VecOperandU256 = VecOperand<VecElemU32, false, 8>;
|
||||
using VecOperandU512 = VecOperand<VecElemU32, false, 16>;
|
||||
// non-writeable versions of vector operands
|
||||
using ConstVecOperandU8 = VecOperand<VecElemU8, true, 1>;
|
||||
using ConstVecOperandI8 = VecOperand<VecElemI8, true, 1>;
|
||||
using ConstVecOperandU16 = VecOperand<VecElemU16, true, 1>;
|
||||
using ConstVecOperandI16 = VecOperand<VecElemI16, true, 1>;
|
||||
using ConstVecOperandU32 = VecOperand<VecElemU32, true>;
|
||||
using ConstVecOperandI32 = VecOperand<VecElemI32, true>;
|
||||
using ConstVecOperandF32 = VecOperand<VecElemF32, true>;
|
||||
using ConstVecOperandU64 = VecOperand<VecElemU64, true>;
|
||||
using ConstVecOperandI64 = VecOperand<VecElemI64, true>;
|
||||
using ConstVecOperandF64 = VecOperand<VecElemF64, true>;
|
||||
using ConstVecOperandU96 = VecOperand<VecElemU32, true, 3>;
|
||||
using ConstVecOperandU128 = VecOperand<VecElemU32, true, 4>;
|
||||
using ConstVecOperandU256 = VecOperand<VecElemU32, true, 8>;
|
||||
using ConstVecOperandU512 = VecOperand<VecElemU32, true, 16>;
|
||||
}
|
||||
|
||||
#endif // __ARCH_VEGA_OPERAND_HH__
|
||||
245
src/arch/amdgpu/vega/registers.cc
Normal file
245
src/arch/amdgpu/vega/registers.cc
Normal file
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "arch/amdgpu/vega/gpu_registers.hh"
|
||||
|
||||
namespace VegaISA
|
||||
{
|
||||
std::string
|
||||
opSelectorToRegSym(int idx, int numRegs)
|
||||
{
|
||||
std::string reg_sym;
|
||||
|
||||
// we have an SGPR
|
||||
if (idx <= REG_SGPR_MAX) {
|
||||
if (numRegs > 1)
|
||||
reg_sym = "s[" + std::to_string(idx) + ":" +
|
||||
std::to_string(idx + numRegs - 1) + "]";
|
||||
else
|
||||
reg_sym = "s" + std::to_string(idx);
|
||||
return reg_sym;
|
||||
} else if (idx >= REG_VGPR_MIN && idx <= REG_VGPR_MAX) {
|
||||
if (numRegs > 1)
|
||||
reg_sym = "v[" + std::to_string(idx - REG_VGPR_MIN) + ":" +
|
||||
std::to_string(idx - REG_VGPR_MIN + numRegs - 1) + "]";
|
||||
else
|
||||
reg_sym = "v" + std::to_string(idx - REG_VGPR_MIN);
|
||||
return reg_sym;
|
||||
} else if (idx >= REG_INT_CONST_POS_MIN &&
|
||||
idx <= REG_INT_CONST_POS_MAX) {
|
||||
reg_sym = std::to_string(idx - REG_INT_CONST_POS_MIN + 1);
|
||||
return reg_sym;
|
||||
} else if (idx >= REG_INT_CONST_NEG_MIN &&
|
||||
idx <= REG_INT_CONST_NEG_MAX) {
|
||||
int inline_val = -1 - (idx - REG_INT_CONST_NEG_MIN);
|
||||
reg_sym = std::to_string(inline_val);
|
||||
return reg_sym;
|
||||
}
|
||||
|
||||
switch (idx) {
|
||||
case REG_FLAT_SCRATCH_LO:
|
||||
reg_sym = "flat_scratch_lo";
|
||||
break;
|
||||
case REG_FLAT_SCRATCH_HI:
|
||||
reg_sym = "flat_scratch_hi";
|
||||
break;
|
||||
case REG_VCC_LO:
|
||||
reg_sym = "vcc";
|
||||
break;
|
||||
case REG_M0:
|
||||
reg_sym = "m0";
|
||||
break;
|
||||
case REG_EXEC_LO:
|
||||
reg_sym = "exec";
|
||||
break;
|
||||
case REG_ZERO:
|
||||
reg_sym = "0";
|
||||
break;
|
||||
case REG_POS_HALF:
|
||||
reg_sym = "0.5";
|
||||
break;
|
||||
case REG_NEG_HALF:
|
||||
reg_sym = "-0.5";
|
||||
break;
|
||||
case REG_POS_ONE:
|
||||
reg_sym = "1";
|
||||
break;
|
||||
case REG_NEG_ONE:
|
||||
reg_sym = "-1";
|
||||
break;
|
||||
case REG_POS_TWO:
|
||||
reg_sym = "2";
|
||||
break;
|
||||
case REG_NEG_TWO:
|
||||
reg_sym = "-2";
|
||||
break;
|
||||
case REG_POS_FOUR:
|
||||
reg_sym = "4";
|
||||
break;
|
||||
case REG_NEG_FOUR:
|
||||
reg_sym = "-4";
|
||||
break;
|
||||
default:
|
||||
fatal("VEGA ISA instruction has unknown register index %u\n", idx);
|
||||
break;
|
||||
}
|
||||
|
||||
return reg_sym;
|
||||
}
|
||||
|
||||
int
|
||||
opSelectorToRegIdx(int idx, int numScalarRegs)
|
||||
{
|
||||
int regIdx = -1;
|
||||
|
||||
if (idx <= REG_SGPR_MAX) {
|
||||
regIdx = idx;
|
||||
} else if (idx >= REG_VGPR_MIN && idx <= REG_VGPR_MAX) {
|
||||
regIdx = idx - REG_VGPR_MIN;
|
||||
} else if (idx == REG_VCC_LO) {
|
||||
/**
|
||||
* the VCC register occupies the two highest numbered
|
||||
* SRF entries. VCC is typically indexed by specifying
|
||||
* VCC_LO (simply called VCC) in the instruction encoding
|
||||
* and reading it as a 64b value so we only return the
|
||||
* index to the lower half of the VCC register.
|
||||
*
|
||||
* VCC_LO = s[NUM_SGPRS - 2]
|
||||
* VCC_HI = s[NUM_SGPRS - 1]
|
||||
*
|
||||
*/
|
||||
regIdx = numScalarRegs - 2;
|
||||
} else if (idx == REG_VCC_HI) {
|
||||
regIdx = numScalarRegs - 1;
|
||||
} else if (idx == REG_FLAT_SCRATCH_LO) {
|
||||
/**
|
||||
* the FLAT_SCRATCH register occupies the two SRF entries
|
||||
* just below VCC. FLAT_SCRATCH is typically indexed by
|
||||
* specifying FLAT_SCRATCH_LO (simply called FLAT_SCRATCH)
|
||||
* in the instruction encoding and reading it as a 64b value
|
||||
* so we only return the index to the lower half of the
|
||||
* FLAT_SCRATCH register.
|
||||
*
|
||||
* FLAT_SCRATCH_LO = s[NUM_SGPRS - 4]
|
||||
* FLAT_SCRATCH_HI = s[NUM_SGPRS - 3]
|
||||
*
|
||||
*/
|
||||
regIdx = numScalarRegs - 4;
|
||||
} else if (idx == REG_FLAT_SCRATCH_HI) {
|
||||
regIdx = numScalarRegs - 3;
|
||||
} else if (idx == REG_EXEC_LO || idx == REG_EXEC_HI) {
|
||||
/**
|
||||
* If the operand is the EXEC mask we just return the op
|
||||
* selector value indicating it is the EXEC mask, which is
|
||||
* not part of any RF. Higher-level calls will understand
|
||||
* that this resolves to a special system register, not an
|
||||
* index into an RF.
|
||||
*/
|
||||
return idx;
|
||||
}
|
||||
|
||||
return regIdx;
|
||||
}
|
||||
|
||||
bool
|
||||
isPosConstVal(int opIdx)
|
||||
{
|
||||
bool is_pos_const_val = (opIdx >= REG_INT_CONST_POS_MIN
|
||||
&& opIdx <= REG_INT_CONST_POS_MAX);
|
||||
|
||||
return is_pos_const_val;
|
||||
}
|
||||
|
||||
bool
|
||||
isNegConstVal(int opIdx)
|
||||
{
|
||||
bool is_neg_const_val = (opIdx >= REG_INT_CONST_NEG_MIN
|
||||
&& opIdx <= REG_INT_CONST_NEG_MAX);
|
||||
|
||||
return is_neg_const_val;
|
||||
}
|
||||
|
||||
bool
|
||||
isConstVal(int opIdx)
|
||||
{
|
||||
bool is_const_val = isPosConstVal(opIdx) || isNegConstVal(opIdx);
|
||||
return is_const_val;
|
||||
}
|
||||
|
||||
bool
|
||||
isLiteral(int opIdx)
|
||||
{
|
||||
return opIdx == REG_SRC_LITERAL;
|
||||
}
|
||||
|
||||
bool
|
||||
isExecMask(int opIdx)
|
||||
{
|
||||
return opIdx == REG_EXEC_LO || opIdx == REG_EXEC_HI;
|
||||
}
|
||||
|
||||
bool
|
||||
isVccReg(int opIdx)
|
||||
{
|
||||
return opIdx == REG_VCC_LO || opIdx == REG_VCC_HI;
|
||||
}
|
||||
|
||||
bool
|
||||
isFlatScratchReg(int opIdx)
|
||||
{
|
||||
return opIdx == REG_FLAT_SCRATCH_LO || opIdx == REG_FLAT_SCRATCH_HI;
|
||||
}
|
||||
|
||||
bool
|
||||
isScalarReg(int opIdx)
|
||||
{
|
||||
// FLAT_SCRATCH and VCC are stored in an SGPR pair
|
||||
if (opIdx <= REG_SGPR_MAX || opIdx == REG_FLAT_SCRATCH_LO ||
|
||||
opIdx == REG_FLAT_SCRATCH_HI || opIdx == REG_VCC_LO ||
|
||||
opIdx == REG_VCC_HI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
isVectorReg(int opIdx)
|
||||
{
|
||||
if (opIdx >= REG_VGPR_MIN && opIdx <= REG_VGPR_MAX)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace VegaISA
|
||||
Reference in New Issue
Block a user